add editorconfig

Add a .editorconfig file for qemu. Specifies the indent and tab style for various files (C code and Makefiles for starters). Most popular editors support this either natively or via plugin. Check http://editorconfig.org/ for details. Signed-off-by: Gerd Hoffmann <kraxel@redhat.com> Message-id: 20170717101547.22295-1-kraxel@redhat.com
add scripts/git.orderfile
2017-07-20 09:56:56 +02:00 · 2017-07-20 09:56:41 +02:00 · 2017-07-19 20:45:37 +01:00 · 2017-07-19 17:51:36 +01:00 · 2017-07-19 16:31:08 +01:00 · 2017-07-19 14:42:18 +01:00
790 changed files with 35122 additions and 12104 deletions
--- a/.editorconfig
+++ b/.editorconfig
@@ -0,0 +1,15 @@
+# http://editorconfig.org
+root = true
+
+[*]
+end_of_line = lf
+insert_final_newline = true
+charset = utf-8
+
+[Makefile*]
+indent_style = tab
+indent_size = 8
+
+[*.{c,h}]
+indent_style = space
+indent_size = 4
--- a/.travis.yml
+++ b/.travis.yml
@@ -86,6 +86,9 @@ matrix:
    - env: CONFIG="--enable-trace-backends=ust"
           TEST_CMD=""
      compiler: gcc
+    - env: CONFIG="--disable-tcg"
+           TEST_CMD=""
+      compiler: gcc
    - env: CONFIG=""
      os: osx
      compiler: clang
--- a/63
+++ b/63
@@ -84,14 +84,10 @@ M: Paolo Bonzini <pbonzini@redhat.com>
 M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
 M: Richard Henderson <rth@twiddle.net>
 S: Maintained
-F: cpu-exec.c
-F: cpu-exec-common.c
 F: cpus.c
-F: cputlb.c
 F: exec.c
 F: softmmu_template.h
-F: translate-all.*
-F: translate-common.c
+F: accel/tcg/
 F: include/exec/cpu*.h
 F: include/exec/exec-all.h
 F: include/exec/helper*.h
@@ -277,8 +273,8 @@ Overall
 M: Paolo Bonzini <pbonzini@redhat.com>
 L: kvm@vger.kernel.org
 S: Supported
-F: kvm-*
 F: */kvm.*
+F: accel/kvm/
 F: include/sysemu/kvm*.h

 ARM
@@ -299,7 +295,7 @@ F: target/ppc/kvm.c

 S390
 M: Christian Borntraeger <borntraeger@de.ibm.com>
-M: Cornelia Huck <cornelia.huck@de.ibm.com>
+M: Cornelia Huck <cohuck@redhat.com>
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
 F: target/s390x/kvm.c
@@ -327,7 +323,6 @@ M: Stefano Stabellini <sstabellini@kernel.org>
 M: Anthony Perard <anthony.perard@citrix.com>
 L: xen-devel@lists.xenproject.org
 S: Supported
-F: xen-*
 F: */xen*
 F: hw/9pfs/xen-9p-backend.c
 F: hw/char/xen_console.c
@@ -380,7 +375,7 @@ F: hw/*/allwinner*
 F: include/hw/*/allwinner*
 F: hw/arm/cubieboard.c

-ARM PrimeCell
+ARM PrimeCell and CMSDK devices
 M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
@@ -394,6 +389,10 @@ F: hw/intc/pl190.c
 F: hw/sd/pl181.c
 F: hw/timer/pl031.c
 F: include/hw/arm/primecell.h
+F: hw/timer/cmsdk-apb-timer.c
+F: include/hw/timer/cmsdk-apb-timer.h
+F: hw/char/cmsdk-apb-uart.c
+F: include/hw/char/cmsdk-apb-uart.h

 ARM cores
 M: Peter Maydell <peter.maydell@linaro.org>
@@ -455,6 +454,14 @@ S: Maintained
 F: hw/arm/integratorcp.c
 F: hw/misc/arm_integrator_debug.c

+MPS2
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/mps2.c
+F: hw/misc/mps2-scc.c
+F: include/hw/misc/mps2-scc.h
+
 Musicpal
 M: Jan Kiszka <jan.kiszka@web.de>
 L: qemu-arm@nongnu.org
@@ -778,7 +785,7 @@ F: include/hw/sparc/grlib.h
 S390 Machines
 -------------
 S390 Virtio-ccw
-M: Cornelia Huck <cornelia.huck@de.ibm.com>
+M: Cornelia Huck <cohuck@redhat.com>
 M: Christian Borntraeger <borntraeger@de.ibm.com>
 M: Alexander Graf <agraf@suse.de>
 S: Supported
@@ -1006,7 +1013,7 @@ F: hw/vfio/*
 F: include/hw/vfio/

 vfio-ccw
-M: Cornelia Huck <cornelia.huck@de.ibm.com>
+M: Cornelia Huck <cohuck@redhat.com>
 S: Supported
 F: hw/vfio/ccw.c
 F: hw/s390x/s390-ccw.c
@@ -1048,7 +1055,7 @@ F: tests/virtio-blk-test.c
 T: git git://github.com/stefanha/qemu.git block

 virtio-ccw
-M: Cornelia Huck <cornelia.huck@de.ibm.com>
+M: Cornelia Huck <cohuck@redhat.com>
 M: Christian Borntraeger <borntraeger@de.ibm.com>
 S: Supported
 F: hw/s390x/virtio-ccw.[hc]
@@ -1160,6 +1167,13 @@ F: docs/specs/vmgenid.txt
 F: tests/vmgenid-test.c
 F: stubs/vmgenid.c

+Unimplemented device
+M: Peter Maydell <peter.maydell@linaro.org>
+R: Philippe Mathieu-Daudé <f4bug@amsat.org>
+S: Maintained
+F: include/hw/misc/unimp.h
+F: hw/misc/unimp.c
+
 Subsystems
 ----------
 Audio
@@ -1348,15 +1362,6 @@ W: http://info.iet.unipi.it/~luigi/netmap/
 S: Maintained
 F: net/netmap.c

-Network Block Device (NBD)
-M: Paolo Bonzini <pbonzini@redhat.com>
-S: Odd Fixes
-F: block/nbd*
-F: nbd/
-F: include/block/nbd*
-F: qemu-nbd.c
-T: git git://github.com/bonzini/qemu.git nbd-next
-
 NUMA
 M: Eduardo Habkost <ehabkost@redhat.com>
 S: Maintained
@@ -1489,7 +1494,7 @@ F: tests/vmstate-static-checker-data/
 F: docs/migration.txt

 Seccomp
-M: Eduardo Otubo <eduardo.otubo@profitbricks.com>
+M: Eduardo Otubo <otubo@redhat.com>
 S: Supported
 F: qemu-seccomp.c
 F: include/sysemu/seccomp.h
@@ -1659,7 +1664,7 @@ TCI target
 M: Stefan Weil <sw@weilnetz.de>
 S: Maintained
 F: tcg/tci/
-F: tci.c
+F: tcg/tci.c
 F: disas/tci.c

 Block drivers
@@ -1710,6 +1715,18 @@ S: Supported
 F: block/iscsi.c
 F: block/iscsi-opts.c

+Network Block Device (NBD)
+M: Eric Blake <eblake@redhat.com>
+M: Paolo Bonzini <pbonzini@redhat.com>
+L: qemu-block@nongnu.org
+S: Maintained
+F: block/nbd*
+F: nbd/
+F: include/block/nbd*
+F: qemu-nbd.*
+F: blockdev-nbd.c
+T: git git://repo.or.cz/qemu/ericb.git nbd
+
 NFS
 M: Jeff Cody <jcody@redhat.com>
 M: Peter Lieven <pl@kamp.de>
--- a/2
+++ b/2
@@ -553,7 +553,7 @@ efi-e1000e.rom efi-vmxnet3.rom \
 qemu-icon.bmp qemu_logo_no_text.svg \
 bamboo.dtb petalogix-s3adsp1800.dtb petalogix-ml605.dtb \
 multiboot.bin linuxboot.bin linuxboot_dma.bin kvmvapic.bin \
-s390-ccw.img \
+s390-ccw.img s390-netboot.img \
 spapr-rtas.bin slof.bin skiboot.lid \
 palcode-clipper \
 u-boot.e500 \
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -40,7 +40,7 @@ io-obj-y = io/

 ifeq ($(CONFIG_SOFTMMU),y)
 common-obj-y = blockdev.o blockdev-nbd.o block/
-common-obj-y += iothread.o
+common-obj-y += bootdevice.o iothread.o
 common-obj-y += net/
 common-obj-y += qdev-monitor.o device-hotplug.o
 common-obj-$(CONFIG_WIN32) += os-win32.o
@@ -122,7 +122,6 @@ trace-events-subdirs += crypto
 trace-events-subdirs += io
 trace-events-subdirs += migration
 trace-events-subdirs += block
-trace-events-subdirs += backends
 trace-events-subdirs += chardev
 trace-events-subdirs += hw/block
 trace-events-subdirs += hw/block/dataplane
@@ -168,6 +167,7 @@ trace-events-subdirs += linux-user
 trace-events-subdirs += qapi
 trace-events-subdirs += accel/tcg
 trace-events-subdirs += accel/kvm
+trace-events-subdirs += nbd

 trace-events-files = $(SRC_PATH)/trace-events $(trace-events-subdirs:%=$(SRC_PATH)/%/trace-events)

--- a/Makefile.target
+++ b/Makefile.target
@@ -90,8 +90,8 @@ all: $(PROGS) stap
 # cpu emulator library
 obj-y += exec.o
 obj-y += accel/
-obj-y += tcg/tcg.o tcg/tcg-op.o tcg/optimize.o
-obj-y += tcg/tcg-common.o tcg/tcg-runtime.o
+obj-$(CONFIG_TCG) += tcg/tcg.o tcg/tcg-op.o tcg/optimize.o
+obj-$(CONFIG_TCG) += tcg/tcg-common.o tcg/tcg-runtime.o
 obj-$(CONFIG_TCG_INTERPRETER) += tcg/tci.o
 obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
 obj-y += fpu/softfloat.o
@@ -137,7 +137,7 @@ endif #CONFIG_BSD_USER
 # System emulator target
 ifdef CONFIG_SOFTMMU
 obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o
-obj-y += qtest.o bootdevice.o
+obj-y += qtest.o
 obj-y += hw/
 obj-y += memory.o
 obj-y += memory_mapping.o
--- a/accel/Makefile.objs
+++ b/accel/Makefile.objs
@@ -1,4 +1,4 @@
 obj-$(CONFIG_SOFTMMU) += accel.o
 obj-y += kvm/
-obj-y += tcg/
+obj-$(CONFIG_TCG) += tcg/
 obj-y += stubs/
--- a/accel/accel.c
+++ b/accel/accel.c
@@ -120,6 +120,12 @@ void configure_accelerator(MachineState *ms)
    }
 }

+void accel_register_compat_props(AccelState *accel)
+{
+    AccelClass *class = ACCEL_GET_CLASS(accel);
+    register_compat_props_array(class->global_props);
+}
+
 static void register_accel_types(void)
 {
    type_register_static(&accel_type);
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -318,7 +318,7 @@ int kvm_init_vcpu(CPUState *cpu)

    cpu->kvm_fd = ret;
    cpu->kvm_state = s;
-    cpu->kvm_vcpu_dirty = true;
+    cpu->vcpu_dirty = true;

    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
    if (mmap_size < 0) {
@@ -981,15 +981,6 @@ static MemoryListener kvm_io_listener = {
    .priority = 10,
 };

-static void kvm_handle_interrupt(CPUState *cpu, int mask)
-{
-    cpu->interrupt_request |= mask;
-
-    if (!qemu_cpu_is_self(cpu)) {
-        qemu_cpu_kick(cpu);
-    }
-}
-
 int kvm_set_irq(KVMState *s, int irq, int level)
 {
    struct kvm_irq_level event;
@@ -1774,8 +1765,6 @@ static int kvm_init(MachineState *ms)

    s->many_ioeventfds = kvm_check_many_ioeventfds();

-    cpu_interrupt_handler = kvm_handle_interrupt;
-
    return 0;

 err:
@@ -1864,15 +1853,15 @@ void kvm_flush_coalesced_mmio_buffer(void)

 static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
 {
-    if (!cpu->kvm_vcpu_dirty) {
+    if (!cpu->vcpu_dirty) {
        kvm_arch_get_registers(cpu);
-        cpu->kvm_vcpu_dirty = true;
+        cpu->vcpu_dirty = true;
    }
 }

 void kvm_cpu_synchronize_state(CPUState *cpu)
 {
-    if (!cpu->kvm_vcpu_dirty) {
+    if (!cpu->vcpu_dirty) {
        run_on_cpu(cpu, do_kvm_cpu_synchronize_state, RUN_ON_CPU_NULL);
    }
 }
@@ -1880,7 +1869,7 @@ void kvm_cpu_synchronize_state(CPUState *cpu)
 static void do_kvm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
 {
    kvm_arch_put_registers(cpu, KVM_PUT_RESET_STATE);
-    cpu->kvm_vcpu_dirty = false;
+    cpu->vcpu_dirty = false;
 }

 void kvm_cpu_synchronize_post_reset(CPUState *cpu)
@@ -1891,7 +1880,7 @@ void kvm_cpu_synchronize_post_reset(CPUState *cpu)
 static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
 {
    kvm_arch_put_registers(cpu, KVM_PUT_FULL_STATE);
-    cpu->kvm_vcpu_dirty = false;
+    cpu->vcpu_dirty = false;
 }

 void kvm_cpu_synchronize_post_init(CPUState *cpu)
@@ -1901,7 +1890,7 @@ void kvm_cpu_synchronize_post_init(CPUState *cpu)

 static void do_kvm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
 {
-    cpu->kvm_vcpu_dirty = true;
+    cpu->vcpu_dirty = true;
 }

 void kvm_cpu_synchronize_pre_loadvm(CPUState *cpu)
@@ -1982,9 +1971,9 @@ int kvm_cpu_exec(CPUState *cpu)
    do {
        MemTxAttrs attrs;

-        if (cpu->kvm_vcpu_dirty) {
+        if (cpu->vcpu_dirty) {
            kvm_arch_put_registers(cpu, KVM_PUT_RUNTIME_STATE);
-            cpu->kvm_vcpu_dirty = false;
+            cpu->vcpu_dirty = false;
        }

        kvm_arch_pre_run(cpu, run);
@@ -2285,6 +2274,11 @@ int kvm_has_intx_set_mask(void)
    return kvm_state->intx_set_mask;
 }

+bool kvm_arm_supports_user_irq(void)
+{
+    return kvm_check_extension(kvm_state, KVM_CAP_ARM_USER_IRQ);
+}
+
 #ifdef KVM_CAP_SET_GUEST_DEBUG
 struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *cpu,
                                                 target_ulong pc)
--- a/accel/stubs/Makefile.objs
+++ b/accel/stubs/Makefile.objs
@@ -1 +1,2 @@
 obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
+obj-$(call lnot,$(CONFIG_TCG)) += tcg-stub.o
--- a/accel/stubs/kvm-stub.c
+++ b/accel/stubs/kvm-stub.c
@@ -155,4 +155,9 @@ void kvm_init_cpu_signals(CPUState *cpu)
 {
    abort();
 }
+
+bool kvm_arm_supports_user_irq(void)
+{
+    return false;
+}
 #endif
--- a/accel/stubs/tcg-stub.c
+++ b/accel/stubs/tcg-stub.c
@@ -0,0 +1,22 @@
+/*
+ * QEMU TCG accelerator stub
+ *
+ * Copyright Red Hat, Inc. 2013
+ *
+ * Author: Paolo Bonzini     <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "cpu.h"
+#include "tcg/tcg.h"
+#include "exec/cpu-common.h"
+#include "exec/exec-all.h"
+
+void tb_flush(CPUState *cpu)
+{
+}
--- a/accel/tcg/Makefile.objs
+++ b/accel/tcg/Makefile.objs
@@ -1,3 +1,3 @@
 obj-$(CONFIG_SOFTMMU) += tcg-all.o
 obj-$(CONFIG_SOFTMMU) += cputlb.o
-obj-y += cpu-exec.o cpu-exec-common.o translate-all.o translate-common.o
+obj-y += cpu-exec.o cpu-exec-common.o translate-all.o
--- a/accel/tcg/cpu-exec-common.c
+++ b/accel/tcg/cpu-exec-common.c
@@ -23,6 +23,8 @@
 #include "exec/exec-all.h"
 #include "exec/memory-internal.h"

+bool tcg_allowed;
+
 /* exit the current TB, but without causing any exception to be raised */
 void cpu_loop_exit_noexc(CPUState *cpu)
 {
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -280,6 +280,7 @@ struct tb_desc {
    CPUArchState *env;
    tb_page_addr_t phys_page1;
    uint32_t flags;
+    uint32_t trace_vcpu_dstate;
 };

 static bool tb_cmp(const void *p, const void *d)
@@ -291,6 +292,7 @@ static bool tb_cmp(const void *p, const void *d)
        tb->page_addr[0] == desc->phys_page1 &&
        tb->cs_base == desc->cs_base &&
        tb->flags == desc->flags &&
+        tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
        !atomic_read(&tb->invalid)) {
        /* check next page if needed */
        if (tb->page_addr[1] == -1) {
@@ -319,10 +321,11 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
    desc.env = (CPUArchState *)cpu->env_ptr;
    desc.cs_base = cs_base;
    desc.flags = flags;
+    desc.trace_vcpu_dstate = *cpu->trace_dstate;
    desc.pc = pc;
    phys_pc = get_page_addr_code(desc.env, pc);
    desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
-    h = tb_hash_func(phys_pc, pc, flags);
+    h = tb_hash_func(phys_pc, pc, flags, *cpu->trace_dstate);
    return qht_lookup(&tcg_ctx.tb_ctx.htable, tb_cmp, &desc, h);
 }

@@ -342,7 +345,8 @@ static inline TranslationBlock *tb_find(CPUState *cpu,
    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
    tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]);
    if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base ||
-                 tb->flags != flags)) {
+                 tb->flags != flags ||
+                 tb->trace_vcpu_dstate != *cpu->trace_dstate)) {
        tb = tb_htable_lookup(cpu, pc, cs_base, flags);
        if (!tb) {

--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -118,7 +118,7 @@ static void tlb_flush_nocheck(CPUState *cpu)

    memset(env->tlb_table, -1, sizeof(env->tlb_table));
    memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
-    memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
+    cpu_tb_jmp_cache_clear(cpu);

    env->vtlb_index = 0;
    env->tlb_flush_addr = -1;
@@ -183,7 +183,7 @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
        }
    }

-    memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
+    cpu_tb_jmp_cache_clear(cpu);

    tlb_debug("done\n");

@@ -746,41 +746,6 @@ static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
    return ram_addr;
 }

-/* NOTE: this function can trigger an exception */
-/* NOTE2: the returned address is not exactly the physical address: it
- * is actually a ram_addr_t (in system mode; the user mode emulation
- * version of this function returns a guest virtual address).
- */
-tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
-{
-    int mmu_idx, page_index, pd;
-    void *p;
-    MemoryRegion *mr;
-    CPUState *cpu = ENV_GET_CPU(env1);
-    CPUIOTLBEntry *iotlbentry;
-
-    page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-    mmu_idx = cpu_mmu_index(env1, true);
-    if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
-                 (addr & TARGET_PAGE_MASK))) {
-        cpu_ldub_code(env1, addr);
-    }
-    iotlbentry = &env1->iotlb[mmu_idx][page_index];
-    pd = iotlbentry->addr & ~TARGET_PAGE_MASK;
-    mr = iotlb_to_region(cpu, pd, iotlbentry->attrs);
-    if (memory_region_is_unassigned(mr)) {
-        cpu_unassigned_access(cpu, addr, false, true, 0, 4);
-        /* The CPU's unassigned access hook might have longjumped out
-         * with an exception. If it didn't (or there was no hook) then
-         * we can't proceed further.
-         */
-        report_bad_exec(cpu, addr);
-        exit(1);
-    }
-    p = (void *)((uintptr_t)addr + env1->tlb_table[mmu_idx][page_index].addend);
-    return qemu_ram_addr_from_host_nofail(p);
-}
-
 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
                         target_ulong addr, uintptr_t retaddr, int size)
 {
@@ -868,6 +833,53 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
  victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
                 (ADDR) & TARGET_PAGE_MASK)

+/* NOTE: this function can trigger an exception */
+/* NOTE2: the returned address is not exactly the physical address: it
+ * is actually a ram_addr_t (in system mode; the user mode emulation
+ * version of this function returns a guest virtual address).
+ */
+tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
+{
+    int mmu_idx, index, pd;
+    void *p;
+    MemoryRegion *mr;
+    CPUState *cpu = ENV_GET_CPU(env);
+    CPUIOTLBEntry *iotlbentry;
+
+    index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    mmu_idx = cpu_mmu_index(env, true);
+    if (unlikely(env->tlb_table[mmu_idx][index].addr_code !=
+                 (addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK)))) {
+        if (!VICTIM_TLB_HIT(addr_read, addr)) {
+            tlb_fill(ENV_GET_CPU(env), addr, MMU_INST_FETCH, mmu_idx, 0);
+        }
+    }
+    iotlbentry = &env->iotlb[mmu_idx][index];
+    pd = iotlbentry->addr & ~TARGET_PAGE_MASK;
+    mr = iotlb_to_region(cpu, pd, iotlbentry->attrs);
+    if (memory_region_is_unassigned(mr)) {
+        qemu_mutex_lock_iothread();
+        if (memory_region_request_mmio_ptr(mr, addr)) {
+            qemu_mutex_unlock_iothread();
+            /* A MemoryRegion is potentially added so re-run the
+             * get_page_addr_code.
+             */
+            return get_page_addr_code(env, addr);
+        }
+        qemu_mutex_unlock_iothread();
+
+        cpu_unassigned_access(cpu, addr, false, true, 0, 4);
+        /* The CPU's unassigned access hook might have longjumped out
+         * with an exception. If it didn't (or there was no hook) then
+         * we can't proceed further.
+         */
+        report_bad_exec(cpu, addr);
+        exit(1);
+    }
+    p = (void *)((uintptr_t)addr + env->tlb_table[mmu_idx][index].addend);
+    return qemu_ram_addr_from_host_nofail(p);
+}
+
 /* Probe for whether the specified guest write access is permitted.
 * If it is not permitted then an exception will be taken in the same
 * way as if this were a real write access (and we will not return).
--- a/accel/tcg/tcg-all.c
+++ b/accel/tcg/tcg-all.c
@@ -27,13 +27,44 @@
 #include "sysemu/accel.h"
 #include "sysemu/sysemu.h"
 #include "qom/object.h"
+#include "qemu-common.h"
+#include "qom/cpu.h"
+#include "sysemu/cpus.h"
+#include "qemu/main-loop.h"

-int tcg_tb_size;
-static bool tcg_allowed = true;
+unsigned long tcg_tb_size;
+
+#ifndef CONFIG_USER_ONLY
+/* mask must never be zero, except for A20 change call */
+static void tcg_handle_interrupt(CPUState *cpu, int mask)
+{
+    int old_mask;
+    g_assert(qemu_mutex_iothread_locked());
+
+    old_mask = cpu->interrupt_request;
+    cpu->interrupt_request |= mask;
+
+    /*
+     * If called from iothread context, wake the target cpu in
+     * case its halted.
+     */
+    if (!qemu_cpu_is_self(cpu)) {
+        qemu_cpu_kick(cpu);
+    } else {
+        cpu->icount_decr.u16.high = -1;
+        if (use_icount &&
+            !cpu->can_do_io
+            && (mask & ~old_mask) != 0) {
+            cpu_abort(cpu, "Raised interrupt while not in I/O function");
+        }
+    }
+}
+#endif

 static int tcg_init(MachineState *ms)
 {
    tcg_exec_init(tcg_tb_size * 1024 * 1024);
+    cpu_interrupt_handler = tcg_handle_interrupt;
    return 0;
 }

--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -54,6 +54,7 @@
 #include "exec/tb-hash.h"
 #include "translate-all.h"
 #include "qemu/bitmap.h"
+#include "qemu/error-report.h"
 #include "qemu/timer.h"
 #include "qemu/main-loop.h"
 #include "exec/log.h"
@@ -112,8 +113,10 @@ typedef struct PageDesc {
 #define V_L2_BITS 10
 #define V_L2_SIZE (1 << V_L2_BITS)

-uintptr_t qemu_host_page_size;
-intptr_t qemu_host_page_mask;
+/* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
+QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
+                  sizeof(((TranslationBlock *)0)->trace_vcpu_dstate)
+                  * BITS_PER_BYTE);

 /*
 * L1 Mapping properties
@@ -363,21 +366,6 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t retaddr)
    return r;
 }

-void page_size_init(void)
-{
-    /* NOTE: we can always suppose that qemu_host_page_size >=
-       TARGET_PAGE_SIZE */
-    qemu_real_host_page_size = getpagesize();
-    qemu_real_host_page_mask = -(intptr_t)qemu_real_host_page_size;
-    if (qemu_host_page_size == 0) {
-        qemu_host_page_size = qemu_real_host_page_size;
-    }
-    if (qemu_host_page_size < TARGET_PAGE_SIZE) {
-        qemu_host_page_size = TARGET_PAGE_SIZE;
-    }
-    qemu_host_page_mask = -(intptr_t)qemu_host_page_size;
-}
-
 static void page_init(void)
 {
    page_size_init();
@@ -522,7 +510,7 @@ static inline PageDesc *page_find(tb_page_addr_t index)
 #elif defined(__powerpc__)
 # define MAX_CODE_GEN_BUFFER_SIZE  (32u * 1024 * 1024)
 #elif defined(__aarch64__)
-# define MAX_CODE_GEN_BUFFER_SIZE  (128ul * 1024 * 1024)
+# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
 #elif defined(__s390x__)
  /* We have a +- 4GB range on the branches; leave some slop.  */
 # define MAX_CODE_GEN_BUFFER_SIZE  (3ul * 1024 * 1024 * 1024)
@@ -802,6 +790,7 @@ static void tb_htable_init(void)
   size. */
 void tcg_exec_init(unsigned long tb_size)
 {
+    tcg_allowed = true;
    cpu_gen_init();
    page_init();
    tb_htable_init();
@@ -813,11 +802,6 @@ void tcg_exec_init(unsigned long tb_size)
 #endif
 }

-bool tcg_enabled(void)
-{
-    return tcg_ctx.code_gen_buffer != NULL;
-}
-
 /*
 * Allocate a new translation block. Flush the translation buffer if
 * too many translation blocks or too much generated code.
@@ -928,11 +912,7 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
    }

    CPU_FOREACH(cpu) {
-        int i;
-
-        for (i = 0; i < TB_JMP_CACHE_SIZE; ++i) {
-            atomic_set(&cpu->tb_jmp_cache[i], NULL);
-        }
+        cpu_tb_jmp_cache_clear(cpu);
    }

    tcg_ctx.tb_ctx.nb_tbs = 0;
@@ -1097,7 +1077,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)

    /* remove the TB from the hash list */
    phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
-    h = tb_hash_func(phys_pc, tb->pc, tb->flags);
+    h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->trace_vcpu_dstate);
    qht_remove(&tcg_ctx.tb_ctx.htable, tb, h);

    /* remove the TB from the page list */
@@ -1242,7 +1222,7 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
    }

    /* add in the hash table */
-    h = tb_hash_func(phys_pc, tb->pc, tb->flags);
+    h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->trace_vcpu_dstate);
    qht_insert(&tcg_ctx.tb_ctx.htable, tb, h);

 #ifdef DEBUG_TB_CHECK
@@ -1288,6 +1268,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
    tb->cs_base = cs_base;
    tb->flags = flags;
    tb->cflags = cflags;
+    tb->trace_vcpu_dstate = *cpu->trace_dstate;
    tb->invalid = false;

 #ifdef CONFIG_PROFILER
@@ -1813,19 +1794,21 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
    cpu_loop_exit_noexc(cpu);
 }

+static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
+{
+    unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr);
+
+    for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
+        atomic_set(&cpu->tb_jmp_cache[i0 + i], NULL);
+    }
+}
+
 void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
 {
-    unsigned int i;
-
    /* Discard jump cache entries for any tb which might potentially
       overlap the flushed page.  */
-    i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
-    memset(&cpu->tb_jmp_cache[i], 0,
-           TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
-
-    i = tb_jmp_cache_hash_page(addr);
-    memset(&cpu->tb_jmp_cache[i], 0,
-           TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
+    tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
+    tb_jmp_cache_clear_page(cpu, addr);
 }

 static void print_qht_statistics(FILE *f, fprintf_function cpu_fprintf,
@@ -2225,3 +2208,11 @@ int page_unprotect(target_ulong address, uintptr_t pc)
    return 0;
 }
 #endif /* CONFIG_USER_ONLY */
+
+/* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
+void tcg_flush_softmmu_tlb(CPUState *cs)
+{
+#ifdef CONFIG_SOFTMMU
+    tlb_flush(cs);
+#endif
+}
--- a/accel/tcg/translate-common.c
+++ b/accel/tcg/translate-common.c
@@ -1,56 +0,0 @@
-/*
- *  Host code generation common components
- *
- *  Copyright (c) 2015 Peter Crosthwaite <crosthwaite.peter@gmail.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "qom/cpu.h"
-#include "sysemu/cpus.h"
-#include "qemu/main-loop.h"
-
-uintptr_t qemu_real_host_page_size;
-intptr_t qemu_real_host_page_mask;
-
-#ifndef CONFIG_USER_ONLY
-/* mask must never be zero, except for A20 change call */
-static void tcg_handle_interrupt(CPUState *cpu, int mask)
-{
-    int old_mask;
-    g_assert(qemu_mutex_iothread_locked());
-
-    old_mask = cpu->interrupt_request;
-    cpu->interrupt_request |= mask;
-
-    /*
-     * If called from iothread context, wake the target cpu in
-     * case its halted.
-     */
-    if (!qemu_cpu_is_self(cpu)) {
-        qemu_cpu_kick(cpu);
-    } else {
-        cpu->icount_decr.u16.high = -1;
-        if (use_icount &&
-            !cpu->can_do_io
-            && (mask & ~old_mask) != 0) {
-            cpu_abort(cpu, "Raised interrupt while not in I/O function");
-        }
-    }
-}
-
-CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
-#endif
--- a/audio/rate_template.h
+++ b/audio/rate_template.h
@@ -71,6 +71,12 @@ void NAME (void *opaque, struct st_sample *ibuf, struct st_sample *obuf,
        while (rate->ipos <= (rate->opos >> 32)) {
            ilast = *ibuf++;
            rate->ipos++;
+
+            /* if ipos overflow, there is  a infinite loop */
+            if (rate->ipos == 0xffffffff) {
+                rate->ipos = 1;
+                rate->opos = rate->opos & 0xffffffff;
+            }
            /* See if we finished the input buffer yet */
            if (ibuf >= iend) {
                goto the_end;
--- a/backends/hostmem-ram.c
+++ b/backends/hostmem-ram.c
@@ -28,7 +28,7 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
    }

    path = object_get_canonical_path_component(OBJECT(backend));
-    memory_region_init_ram(&backend->mr, OBJECT(backend), path,
+    memory_region_init_ram_nomigrate(&backend->mr, OBJECT(backend), path,
                           backend->size, errp);
    g_free(path);
 }
--- a/backends/rng-egd.c
+++ b/backends/rng-egd.c
@@ -106,7 +106,7 @@ static void rng_egd_opened(RngBackend *b, Error **errp)

    /* FIXME we should resubmit pending requests when the CDS reconnects. */
    qemu_chr_fe_set_handlers(&s->chr, rng_egd_chr_can_read,
-                             rng_egd_chr_read, NULL, s, NULL, true);
+                             rng_egd_chr_read, NULL, NULL, s, NULL, true);
 }

 static void rng_egd_set_chardev(Object *obj, const char *value, Error **errp)
--- a/backends/trace-events
+++ b/backends/trace-events
--- a/block.c
+++ b/block.c
@@ -2185,6 +2185,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
        ret = -EINVAL;
        goto free_exit;
    }
+    bdrv_set_aio_context(backing_hd, bdrv_get_aio_context(bs));

    /* Hook up the backing file link; drop our reference, bs owns the
     * backing_hd reference now */
@@ -2573,15 +2574,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
        goto close_and_fail;
    }

-    if (!bdrv_key_required(bs)) {
-        bdrv_parent_cb_change_media(bs, true);
-    } else if (!runstate_check(RUN_STATE_PRELAUNCH)
-               && !runstate_check(RUN_STATE_INMIGRATE)
-               && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
-        error_setg(errp,
-                   "Guest must be stopped for opening of encrypted image");
-        goto close_and_fail;
-    }
+    bdrv_parent_cb_change_media(bs, true);

    QDECREF(options);

@@ -2989,24 +2982,45 @@ error:
 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
 {
    BlockDriver *drv;
+    BlockDriverState *bs;
+    bool old_can_write, new_can_write;

    assert(reopen_state != NULL);
-    drv = reopen_state->bs->drv;
+    bs = reopen_state->bs;
+    drv = bs->drv;
    assert(drv != NULL);

+    old_can_write =
+        !bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE);
+
    /* If there are any driver level actions to take */
    if (drv->bdrv_reopen_commit) {
        drv->bdrv_reopen_commit(reopen_state);
    }

    /* set BDS specific flags now */
-    QDECREF(reopen_state->bs->explicit_options);
+    QDECREF(bs->explicit_options);

-    reopen_state->bs->explicit_options   = reopen_state->explicit_options;
-    reopen_state->bs->open_flags         = reopen_state->flags;
-    reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
+    bs->explicit_options   = reopen_state->explicit_options;
+    bs->open_flags         = reopen_state->flags;
+    bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);

-    bdrv_refresh_limits(reopen_state->bs, NULL);
+    bdrv_refresh_limits(bs, NULL);
+
+    new_can_write =
+        !bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE);
+    if (!old_can_write && new_can_write && drv->bdrv_reopen_bitmaps_rw) {
+        Error *local_err = NULL;
+        if (drv->bdrv_reopen_bitmaps_rw(bs, &local_err) < 0) {
+            /* This is not fatal, bitmaps just left read-only, so all following
+             * writes will fail. User can remove read-only bitmaps to unblock
+             * writes.
+             */
+            error_reportf_err(local_err,
+                              "%s: Failed to make dirty bitmaps writable: ",
+                              bdrv_get_node_name(bs));
+        }
+    }
 }

 /*
@@ -3040,9 +3054,6 @@ static void bdrv_close(BlockDriverState *bs)
    bdrv_flush(bs);
    bdrv_drain(bs); /* in case flush left pending I/O */

-    bdrv_release_named_dirty_bitmaps(bs);
-    assert(QLIST_EMPTY(&bs->dirty_bitmaps));
-
    if (bs->drv) {
        BdrvChild *child, *next;

@@ -3072,7 +3083,6 @@ static void bdrv_close(BlockDriverState *bs)
        bs->backing_format[0] = '\0';
        bs->total_sectors = 0;
        bs->encrypted = false;
-        bs->valid_key = false;
        bs->sg = false;
        QDECREF(bs->options);
        QDECREF(bs->explicit_options);
@@ -3081,6 +3091,9 @@ static void bdrv_close(BlockDriverState *bs)
        bs->full_open_options = NULL;
    }

+    bdrv_release_named_dirty_bitmaps(bs);
+    assert(QLIST_EMPTY(&bs->dirty_bitmaps));
+
    QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
        g_free(ban);
    }
@@ -3398,7 +3411,8 @@ exit:
 /**
 * Truncate file to 'offset' bytes (needed only for file protocols)
 */
-int bdrv_truncate(BdrvChild *child, int64_t offset, Error **errp)
+int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc,
+                  Error **errp)
 {
    BlockDriverState *bs = child->bs;
    BlockDriver *drv = bs->drv;
@@ -3421,7 +3435,7 @@ int bdrv_truncate(BdrvChild *child, int64_t offset, Error **errp)

    assert(!(bs->open_flags & BDRV_O_INACTIVE));

-    ret = drv->bdrv_truncate(bs, offset, errp);
+    ret = drv->bdrv_truncate(bs, offset, prealloc, errp);
    if (ret == 0) {
        ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
        bdrv_dirty_bitmap_truncate(bs);
@@ -3450,6 +3464,41 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
    return -ENOTSUP;
 }

+/*
+ * bdrv_measure:
+ * @drv: Format driver
+ * @opts: Creation options for new image
+ * @in_bs: Existing image containing data for new image (may be NULL)
+ * @errp: Error object
+ * Returns: A #BlockMeasureInfo (free using qapi_free_BlockMeasureInfo())
+ *          or NULL on error
+ *
+ * Calculate file size required to create a new image.
+ *
+ * If @in_bs is given then space for allocated clusters and zero clusters
+ * from that image are included in the calculation.  If @opts contains a
+ * backing file that is shared by @in_bs then backing clusters may be omitted
+ * from the calculation.
+ *
+ * If @in_bs is NULL then the calculation includes no allocated clusters
+ * unless a preallocation option is given in @opts.
+ *
+ * Note that @in_bs may use a different BlockDriver from @drv.
+ *
+ * If an error occurs the @errp pointer is set.
+ */
+BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
+                               BlockDriverState *in_bs, Error **errp)
+{
+    if (!drv->bdrv_measure) {
+        error_setg(errp, "Block driver '%s' does not support size measurement",
+                   drv->format_name);
+        return NULL;
+    }
+
+    return drv->bdrv_measure(opts, in_bs, errp);
+}
+
 /**
 * Return number of sectors on success, -errno on error.
 */
@@ -3502,72 +3551,6 @@ bool bdrv_is_encrypted(BlockDriverState *bs)
    return bs->encrypted;
 }

-bool bdrv_key_required(BlockDriverState *bs)
-{
-    BdrvChild *backing = bs->backing;
-
-    if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
-        return true;
-    }
-    return (bs->encrypted && !bs->valid_key);
-}
-
-int bdrv_set_key(BlockDriverState *bs, const char *key)
-{
-    int ret;
-    if (bs->backing && bs->backing->bs->encrypted) {
-        ret = bdrv_set_key(bs->backing->bs, key);
-        if (ret < 0)
-            return ret;
-        if (!bs->encrypted)
-            return 0;
-    }
-    if (!bs->encrypted) {
-        return -EINVAL;
-    } else if (!bs->drv || !bs->drv->bdrv_set_key) {
-        return -ENOMEDIUM;
-    }
-    ret = bs->drv->bdrv_set_key(bs, key);
-    if (ret < 0) {
-        bs->valid_key = false;
-    } else if (!bs->valid_key) {
-        /* call the change callback now, we skipped it on open */
-        bs->valid_key = true;
-        bdrv_parent_cb_change_media(bs, true);
-    }
-    return ret;
-}
-
-/*
- * Provide an encryption key for @bs.
- * If @key is non-null:
- *     If @bs is not encrypted, fail.
- *     Else if the key is invalid, fail.
- *     Else set @bs's key to @key, replacing the existing key, if any.
- * If @key is null:
- *     If @bs is encrypted and still lacks a key, fail.
- *     Else do nothing.
- * On failure, store an error object through @errp if non-null.
- */
-void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
-{
-    if (key) {
-        if (!bdrv_is_encrypted(bs)) {
-            error_setg(errp, "Node '%s' is not encrypted",
-                      bdrv_get_device_or_node_name(bs));
-        } else if (bdrv_set_key(bs, key) < 0) {
-            error_setg(errp, QERR_INVALID_PASSWORD);
-        }
-    } else {
-        if (bdrv_key_required(bs)) {
-            error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
-                      "'%s' (%s) is encrypted",
-                      bdrv_get_device_or_node_name(bs),
-                      bdrv_get_encrypted_filename(bs));
-        }
-    }
-}
-
 const char *bdrv_get_format_name(BlockDriverState *bs)
 {
    return bs->drv ? bs->drv->format_name : NULL;
@@ -4135,6 +4118,10 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs,
        }
    }

+    /* At this point persistent bitmaps should be already stored by the format
+     * driver */
+    bdrv_release_persistent_dirty_bitmaps(bs);
+
    return 0;
 }

@@ -4267,11 +4254,9 @@ bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
    assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
    if (!QLIST_EMPTY(&bs->op_blockers[op])) {
        blocker = QLIST_FIRST(&bs->op_blockers[op]);
-        if (errp) {
-            *errp = error_copy(blocker->reason);
-            error_prepend(errp, "Node '%s' is busy: ",
-                          bdrv_get_device_or_node_name(bs));
-        }
+        error_propagate(errp, error_copy(blocker->reason));
+        error_prepend(errp, "Node '%s' is busy: ",
+                      bdrv_get_device_or_node_name(bs));
        return true;
    }
    return false;
@@ -4411,55 +4396,65 @@ void bdrv_img_create(const char *filename, const char *fmt,

    backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);

-    // The size for the image must always be specified, with one exception:
-    // If we are using a backing file, we can obtain the size from there
+    /* The size for the image must always be specified, unless we have a backing
+     * file and we have not been forbidden from opening it. */
    size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
-    if (size == -1) {
-        if (backing_file) {
-            BlockDriverState *bs;
-            char *full_backing = g_new0(char, PATH_MAX);
-            int64_t size;
-            int back_flags;
-            QDict *backing_options = NULL;
+    if (backing_file && !(flags & BDRV_O_NO_BACKING)) {
+        BlockDriverState *bs;
+        char *full_backing = g_new0(char, PATH_MAX);
+        int back_flags;
+        QDict *backing_options = NULL;

-            bdrv_get_full_backing_filename_from_filename(filename, backing_file,
-                                                         full_backing, PATH_MAX,
-                                                         &local_err);
-            if (local_err) {
-                g_free(full_backing);
-                goto out;
-            }
-
-            /* backing files always opened read-only */
-            back_flags = flags;
-            back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
-
-            if (backing_fmt) {
-                backing_options = qdict_new();
-                qdict_put_str(backing_options, "driver", backing_fmt);
-            }
-
-            bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
-                           &local_err);
+        bdrv_get_full_backing_filename_from_filename(filename, backing_file,
+                                                     full_backing, PATH_MAX,
+                                                     &local_err);
+        if (local_err) {
            g_free(full_backing);
-            if (!bs) {
-                goto out;
-            }
-            size = bdrv_getlength(bs);
-            if (size < 0) {
-                error_setg_errno(errp, -size, "Could not get size of '%s'",
-                                 backing_file);
-                bdrv_unref(bs);
-                goto out;
-            }
-
-            qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
-
-            bdrv_unref(bs);
-        } else {
-            error_setg(errp, "Image creation needs a size parameter");
            goto out;
        }
+
+        /* backing files always opened read-only */
+        back_flags = flags;
+        back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
+
+        if (backing_fmt) {
+            backing_options = qdict_new();
+            qdict_put_str(backing_options, "driver", backing_fmt);
+        }
+
+        bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
+                       &local_err);
+        g_free(full_backing);
+        if (!bs && size != -1) {
+            /* Couldn't open BS, but we have a size, so it's nonfatal */
+            warn_reportf_err(local_err,
+                            "Could not verify backing image. "
+                            "This may become an error in future versions.\n");
+            local_err = NULL;
+        } else if (!bs) {
+            /* Couldn't open bs, do not have size */
+            error_append_hint(&local_err,
+                              "Could not open backing image to determine size.\n");
+            goto out;
+        } else {
+            if (size == -1) {
+                /* Opened BS, have no size */
+                size = bdrv_getlength(bs);
+                if (size < 0) {
+                    error_setg_errno(errp, -size, "Could not get size of '%s'",
+                                     backing_file);
+                    bdrv_unref(bs);
+                    goto out;
+                }
+                qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
+            }
+            bdrv_unref(bs);
+        }
+    } /* (backing_file && !(flags & BDRV_O_NO_BACKING)) */
+
+    if (size == -1) {
+        error_setg(errp, "Image creation needs a size parameter");
+        goto out;
    }

    if (!quiet) {
@@ -4933,3 +4928,25 @@ void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp)

    parent_bs->drv->bdrv_del_child(parent_bs, child, errp);
 }
+
+bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
+                                     uint32_t granularity, Error **errp)
+{
+    BlockDriver *drv = bs->drv;
+
+    if (!drv) {
+        error_setg_errno(errp, ENOMEDIUM,
+                         "Can't store persistent bitmaps to %s",
+                         bdrv_get_device_or_node_name(bs));
+        return false;
+    }
+
+    if (!drv->bdrv_can_store_new_dirty_bitmap) {
+        error_setg_errno(errp, ENOTSUP,
+                         "Can't store persistent bitmaps to %s",
+                         bdrv_get_device_or_node_name(bs));
+        return false;
+    }
+
+    return drv->bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp);
+}
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,6 +1,6 @@
 block-obj-y += raw-format.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o
-block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
-block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
+block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o qcow2-bitmap.o
+block-obj-y += qed.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o
 block-obj-y += quorum.o
--- a/block/backup.c
+++ b/block/backup.c
@@ -39,7 +39,7 @@ typedef struct BackupBlockJob {
    BlockdevOnError on_source_error;
    BlockdevOnError on_target_error;
    CoRwlock flush_rwlock;
-    uint64_t sectors_read;
+    uint64_t bytes_read;
    unsigned long *done_bitmap;
    int64_t cluster_size;
    bool compress;
@@ -47,12 +47,6 @@ typedef struct BackupBlockJob {
    QLIST_HEAD(, CowRequest) inflight_reqs;
 } BackupBlockJob;

-/* Size of a cluster in sectors, instead of bytes. */
-static inline int64_t cluster_size_sectors(BackupBlockJob *job)
-{
-  return job->cluster_size / BDRV_SECTOR_SIZE;
-}
-
 /* See if in-flight requests overlap and wait for them to complete */
 static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
                                                       int64_t start,
@@ -64,7 +58,7 @@ static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
    do {
        retry = false;
        QLIST_FOREACH(req, &job->inflight_reqs, list) {
-            if (end > req->start && start < req->end) {
+            if (end > req->start_byte && start < req->end_byte) {
                qemu_co_queue_wait(&req->wait_queue, NULL);
                retry = true;
                break;
@@ -75,10 +69,10 @@ static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,

 /* Keep track of an in-flight request */
 static void cow_request_begin(CowRequest *req, BackupBlockJob *job,
-                                     int64_t start, int64_t end)
+                              int64_t start, int64_t end)
 {
-    req->start = start;
-    req->end = end;
+    req->start_byte = start;
+    req->end_byte = end;
    qemu_co_queue_init(&req->wait_queue);
    QLIST_INSERT_HEAD(&job->inflight_reqs, req, list);
 }
@@ -91,7 +85,7 @@ static void cow_request_end(CowRequest *req)
 }

 static int coroutine_fn backup_do_cow(BackupBlockJob *job,
-                                      int64_t sector_num, int nb_sectors,
+                                      int64_t offset, uint64_t bytes,
                                      bool *error_is_read,
                                      bool is_write_notifier)
 {
@@ -101,41 +95,37 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
    QEMUIOVector bounce_qiov;
    void *bounce_buffer = NULL;
    int ret = 0;
-    int64_t sectors_per_cluster = cluster_size_sectors(job);
-    int64_t start, end;
-    int n;
+    int64_t start, end; /* bytes */
+    int n; /* bytes */

    qemu_co_rwlock_rdlock(&job->flush_rwlock);

-    start = sector_num / sectors_per_cluster;
-    end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
+    start = QEMU_ALIGN_DOWN(offset, job->cluster_size);
+    end = QEMU_ALIGN_UP(bytes + offset, job->cluster_size);

-    trace_backup_do_cow_enter(job, start, sector_num, nb_sectors);
+    trace_backup_do_cow_enter(job, start, offset, bytes);

    wait_for_overlapping_requests(job, start, end);
    cow_request_begin(&cow_request, job, start, end);

-    for (; start < end; start++) {
-        if (test_bit(start, job->done_bitmap)) {
+    for (; start < end; start += job->cluster_size) {
+        if (test_bit(start / job->cluster_size, job->done_bitmap)) {
            trace_backup_do_cow_skip(job, start);
            continue; /* already copied */
        }

        trace_backup_do_cow_process(job, start);

-        n = MIN(sectors_per_cluster,
-                job->common.len / BDRV_SECTOR_SIZE -
-                start * sectors_per_cluster);
+        n = MIN(job->cluster_size, job->common.len - start);

        if (!bounce_buffer) {
            bounce_buffer = blk_blockalign(blk, job->cluster_size);
        }
        iov.iov_base = bounce_buffer;
-        iov.iov_len = n * BDRV_SECTOR_SIZE;
+        iov.iov_len = n;
        qemu_iovec_init_external(&bounce_qiov, &iov, 1);

-        ret = blk_co_preadv(blk, start * job->cluster_size,
-                            bounce_qiov.size, &bounce_qiov,
+        ret = blk_co_preadv(blk, start, bounce_qiov.size, &bounce_qiov,
                            is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
        if (ret < 0) {
            trace_backup_do_cow_read_fail(job, start, ret);
@@ -146,10 +136,10 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
        }

        if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
-            ret = blk_co_pwrite_zeroes(job->target, start * job->cluster_size,
+            ret = blk_co_pwrite_zeroes(job->target, start,
                                       bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
        } else {
-            ret = blk_co_pwritev(job->target, start * job->cluster_size,
+            ret = blk_co_pwritev(job->target, start,
                                 bounce_qiov.size, &bounce_qiov,
                                 job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
        }
@@ -161,13 +151,13 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
            goto out;
        }

-        set_bit(start, job->done_bitmap);
+        set_bit(start / job->cluster_size, job->done_bitmap);

        /* Publish progress, guest I/O counts as progress too.  Note that the
         * offset field is an opaque progress value, it is not a disk offset.
         */
-        job->sectors_read += n;
-        job->common.offset += n * BDRV_SECTOR_SIZE;
+        job->bytes_read += n;
+        job->common.offset += n;
    }

 out:
@@ -177,7 +167,7 @@ out:

    cow_request_end(&cow_request);

-    trace_backup_do_cow_return(job, sector_num, nb_sectors, ret);
+    trace_backup_do_cow_return(job, offset, bytes, ret);

    qemu_co_rwlock_unlock(&job->flush_rwlock);

@@ -190,14 +180,12 @@ static int coroutine_fn backup_before_write_notify(
 {
    BackupBlockJob *job = container_of(notifier, BackupBlockJob, before_write);
    BdrvTrackedRequest *req = opaque;
-    int64_t sector_num = req->offset >> BDRV_SECTOR_BITS;
-    int nb_sectors = req->bytes >> BDRV_SECTOR_BITS;

    assert(req->bs == blk_bs(job->common.blk));
-    assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0);
-    assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+    assert(QEMU_IS_ALIGNED(req->offset, BDRV_SECTOR_SIZE));
+    assert(QEMU_IS_ALIGNED(req->bytes, BDRV_SECTOR_SIZE));

-    return backup_do_cow(job, sector_num, nb_sectors, NULL, true);
+    return backup_do_cow(job, req->offset, req->bytes, NULL, true);
 }

 static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -208,7 +196,7 @@ static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
        error_setg(errp, QERR_INVALID_PARAMETER, "speed");
        return;
    }
-    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
+    ratelimit_set_speed(&s->limit, speed, SLICE_TIME);
 }

 static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
@@ -275,32 +263,29 @@ void backup_do_checkpoint(BlockJob *job, Error **errp)
    bitmap_zero(backup_job->done_bitmap, len);
 }

-void backup_wait_for_overlapping_requests(BlockJob *job, int64_t sector_num,
-                                          int nb_sectors)
+void backup_wait_for_overlapping_requests(BlockJob *job, int64_t offset,
+                                          uint64_t bytes)
 {
    BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
-    int64_t sectors_per_cluster = cluster_size_sectors(backup_job);
    int64_t start, end;

    assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);

-    start = sector_num / sectors_per_cluster;
-    end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
+    start = QEMU_ALIGN_DOWN(offset, backup_job->cluster_size);
+    end = QEMU_ALIGN_UP(offset + bytes, backup_job->cluster_size);
    wait_for_overlapping_requests(backup_job, start, end);
 }

 void backup_cow_request_begin(CowRequest *req, BlockJob *job,
-                              int64_t sector_num,
-                              int nb_sectors)
+                              int64_t offset, uint64_t bytes)
 {
    BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
-    int64_t sectors_per_cluster = cluster_size_sectors(backup_job);
    int64_t start, end;

    assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);

-    start = sector_num / sectors_per_cluster;
-    end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
+    start = QEMU_ALIGN_DOWN(offset, backup_job->cluster_size);
+    end = QEMU_ALIGN_UP(offset + bytes, backup_job->cluster_size);
    cow_request_begin(req, backup_job, start, end);
 }

@@ -359,8 +344,8 @@ static bool coroutine_fn yield_and_check(BackupBlockJob *job)
     */
    if (job->common.speed) {
        uint64_t delay_ns = ratelimit_calculate_delay(&job->limit,
-                                                      job->sectors_read);
-        job->sectors_read = 0;
+                                                      job->bytes_read);
+        job->bytes_read = 0;
        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
    } else {
        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
@@ -379,11 +364,10 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
    int ret = 0;
    int clusters_per_iter;
    uint32_t granularity;
-    int64_t sector;
+    int64_t offset;
    int64_t cluster;
    int64_t end;
    int64_t last_cluster = -1;
-    int64_t sectors_per_cluster = cluster_size_sectors(job);
    BdrvDirtyBitmapIter *dbi;

    granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
@@ -391,8 +375,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
    dbi = bdrv_dirty_iter_new(job->sync_bitmap, 0);

    /* Find the next dirty sector(s) */
-    while ((sector = bdrv_dirty_iter_next(dbi)) != -1) {
-        cluster = sector / sectors_per_cluster;
+    while ((offset = bdrv_dirty_iter_next(dbi) * BDRV_SECTOR_SIZE) >= 0) {
+        cluster = offset / job->cluster_size;

        /* Fake progress updates for any clusters we skipped */
        if (cluster != last_cluster + 1) {
@@ -405,8 +389,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
                if (yield_and_check(job)) {
                    goto out;
                }
-                ret = backup_do_cow(job, cluster * sectors_per_cluster,
-                                    sectors_per_cluster, &error_is_read,
+                ret = backup_do_cow(job, cluster * job->cluster_size,
+                                    job->cluster_size, &error_is_read,
                                    false);
                if ((ret < 0) &&
                    backup_error_action(job, error_is_read, -ret) ==
@@ -419,7 +403,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
        /* If the bitmap granularity is smaller than the backup granularity,
         * we need to advance the iterator pointer to the next cluster. */
        if (granularity < job->cluster_size) {
-            bdrv_set_dirty_iter(dbi, cluster * sectors_per_cluster);
+            bdrv_set_dirty_iter(dbi,
+                                cluster * job->cluster_size / BDRV_SECTOR_SIZE);
        }

        last_cluster = cluster - 1;
@@ -441,17 +426,14 @@ static void coroutine_fn backup_run(void *opaque)
    BackupBlockJob *job = opaque;
    BackupCompleteData *data;
    BlockDriverState *bs = blk_bs(job->common.blk);
-    int64_t start, end;
-    int64_t sectors_per_cluster = cluster_size_sectors(job);
+    int64_t offset;
    int ret = 0;

    QLIST_INIT(&job->inflight_reqs);
    qemu_co_rwlock_init(&job->flush_rwlock);

-    start = 0;
-    end = DIV_ROUND_UP(job->common.len, job->cluster_size);
-
-    job->done_bitmap = bitmap_new(end);
+    job->done_bitmap = bitmap_new(DIV_ROUND_UP(job->common.len,
+                                               job->cluster_size));

    job->before_write.notify = backup_before_write_notify;
    bdrv_add_before_write_notifier(bs, &job->before_write);
@@ -466,7 +448,8 @@ static void coroutine_fn backup_run(void *opaque)
        ret = backup_run_incremental(job);
    } else {
        /* Both FULL and TOP SYNC_MODE's require copying.. */
-        for (; start < end; start++) {
+        for (offset = 0; offset < job->common.len;
+             offset += job->cluster_size) {
            bool error_is_read;
            int alloced = 0;

@@ -475,12 +458,13 @@ static void coroutine_fn backup_run(void *opaque)
            }

            if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
-                int i, n;
+                int i;
+                int64_t n;

                /* Check to see if these blocks are already in the
                 * backing file. */

-                for (i = 0; i < sectors_per_cluster;) {
+                for (i = 0; i < job->cluster_size;) {
                    /* bdrv_is_allocated() only returns true/false based
                     * on the first set of sectors it comes across that
                     * are are all in the same state.
@@ -488,9 +472,8 @@ static void coroutine_fn backup_run(void *opaque)
                     * backup cluster length.  We end up copying more than
                     * needed but at some point that is always the case. */
                    alloced =
-                        bdrv_is_allocated(bs,
-                                start * sectors_per_cluster + i,
-                                sectors_per_cluster - i, &n);
+                        bdrv_is_allocated(bs, offset + i,
+                                          job->cluster_size - i, &n);
                    i += n;

                    if (alloced || n == 0) {
@@ -508,9 +491,8 @@ static void coroutine_fn backup_run(void *opaque)
            if (alloced < 0) {
                ret = alloced;
            } else {
-                ret = backup_do_cow(job, start * sectors_per_cluster,
-                                    sectors_per_cluster, &error_is_read,
-                                    false);
+                ret = backup_do_cow(job, offset, job->cluster_size,
+                                    &error_is_read, false);
            }
            if (ret < 0) {
                /* Depending on error action, fail now or retry cluster */
@@ -519,7 +501,7 @@ static void coroutine_fn backup_run(void *opaque)
                if (action == BLOCK_ERROR_ACTION_REPORT) {
                    break;
                } else {
-                    start--;
+                    offset -= job->cluster_size;
                    continue;
                }
            }
@@ -657,12 +639,12 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
    ret = bdrv_get_info(target, &bdi);
    if (ret == -ENOTSUP && !target->backing) {
        /* Cluster size is not defined */
-        error_report("WARNING: The target block device doesn't provide "
-                     "information about the block size and it doesn't have a "
-                     "backing file. The default block size of %u bytes is "
-                     "used. If the actual block size of the target exceeds "
-                     "this default, the backup may be unusable",
-                     BACKUP_CLUSTER_SIZE_DEFAULT);
+        warn_report("The target block device doesn't provide "
+                    "information about the block size and it doesn't have a "
+                    "backing file. The default block size of %u bytes is "
+                    "used. If the actual block size of the target exceeds "
+                    "this default, the backup may be unusable",
+                    BACKUP_CLUSTER_SIZE_DEFAULT);
        job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
    } else if (ret < 0 && !target->backing) {
        error_setg_errno(errp, -ret,
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -575,7 +575,7 @@ static int blkdebug_co_flush(BlockDriverState *bs)
 }

 static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
-                                                  int64_t offset, int count,
+                                                  int64_t offset, int bytes,
                                                  BdrvRequestFlags flags)
 {
    uint32_t align = MAX(bs->bl.request_alignment,
@@ -586,29 +586,29 @@ static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
     * preferred alignment (so that we test the fallback to writes on
     * unaligned portions), and check that the block layer never hands
     * us anything unaligned that crosses an alignment boundary.  */
-    if (count < align) {
+    if (bytes < align) {
        assert(QEMU_IS_ALIGNED(offset, align) ||
-               QEMU_IS_ALIGNED(offset + count, align) ||
+               QEMU_IS_ALIGNED(offset + bytes, align) ||
               DIV_ROUND_UP(offset, align) ==
-               DIV_ROUND_UP(offset + count, align));
+               DIV_ROUND_UP(offset + bytes, align));
        return -ENOTSUP;
    }
    assert(QEMU_IS_ALIGNED(offset, align));
-    assert(QEMU_IS_ALIGNED(count, align));
+    assert(QEMU_IS_ALIGNED(bytes, align));
    if (bs->bl.max_pwrite_zeroes) {
-        assert(count <= bs->bl.max_pwrite_zeroes);
+        assert(bytes <= bs->bl.max_pwrite_zeroes);
    }

-    err = rule_check(bs, offset, count);
+    err = rule_check(bs, offset, bytes);
    if (err) {
        return err;
    }

-    return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
+    return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }

 static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
-                                             int64_t offset, int count)
+                                             int64_t offset, int bytes)
 {
    uint32_t align = bs->bl.pdiscard_alignment;
    int err;
@@ -616,29 +616,39 @@ static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
    /* Only pass through requests that are larger than requested
     * minimum alignment, and ensure that unaligned requests do not
     * cross optimum discard boundaries. */
-    if (count < bs->bl.request_alignment) {
+    if (bytes < bs->bl.request_alignment) {
        assert(QEMU_IS_ALIGNED(offset, align) ||
-               QEMU_IS_ALIGNED(offset + count, align) ||
+               QEMU_IS_ALIGNED(offset + bytes, align) ||
               DIV_ROUND_UP(offset, align) ==
-               DIV_ROUND_UP(offset + count, align));
+               DIV_ROUND_UP(offset + bytes, align));
        return -ENOTSUP;
    }
    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
-    assert(QEMU_IS_ALIGNED(count, bs->bl.request_alignment));
-    if (align && count >= align) {
+    assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
+    if (align && bytes >= align) {
        assert(QEMU_IS_ALIGNED(offset, align));
-        assert(QEMU_IS_ALIGNED(count, align));
+        assert(QEMU_IS_ALIGNED(bytes, align));
    }
    if (bs->bl.max_pdiscard) {
-        assert(count <= bs->bl.max_pdiscard);
+        assert(bytes <= bs->bl.max_pdiscard);
    }

-    err = rule_check(bs, offset, count);
+    err = rule_check(bs, offset, bytes);
    if (err) {
        return err;
    }

-    return bdrv_co_pdiscard(bs->file->bs, offset, count);
+    return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
+}
+
+static int64_t coroutine_fn blkdebug_co_get_block_status(
+    BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
+    BlockDriverState **file)
+{
+    *pnum = nb_sectors;
+    *file = bs->file->bs;
+    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
+        (sector_num << BDRV_SECTOR_BITS);
 }

 static void blkdebug_close(BlockDriverState *bs)
@@ -811,9 +821,10 @@ static int64_t blkdebug_getlength(BlockDriverState *bs)
    return bdrv_getlength(bs->file->bs);
 }

-static int blkdebug_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+static int blkdebug_truncate(BlockDriverState *bs, int64_t offset,
+                             PreallocMode prealloc, Error **errp)
 {
-    return bdrv_truncate(bs->file, offset, errp);
+    return bdrv_truncate(bs->file, offset, prealloc, errp);
 }

 static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
@@ -839,9 +850,13 @@ static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
    }

    if (!force_json && bs->file->bs->exact_filename[0]) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "blkdebug:%s:%s", s->config_file ?: "",
-                 bs->file->bs->exact_filename);
+        int ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+                           "blkdebug:%s:%s", s->config_file ?: "",
+                           bs->file->bs->exact_filename);
+        if (ret >= sizeof(bs->exact_filename)) {
+            /* An overflow makes the filename unusable, so do not report any */
+            bs->exact_filename[0] = 0;
+        }
    }

    opts = qdict_new();
@@ -911,6 +926,7 @@ static BlockDriver bdrv_blkdebug = {
    .bdrv_co_flush_to_disk  = blkdebug_co_flush,
    .bdrv_co_pwrite_zeroes  = blkdebug_co_pwrite_zeroes,
    .bdrv_co_pdiscard       = blkdebug_co_pdiscard,
+    .bdrv_co_get_block_status = blkdebug_co_get_block_status,

    .bdrv_debug_event           = blkdebug_debug_event,
    .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -96,10 +96,10 @@ static int coroutine_fn blkreplay_co_pwritev(BlockDriverState *bs,
 }

 static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int count, BdrvRequestFlags flags)
+    int64_t offset, int bytes, BdrvRequestFlags flags)
 {
    uint64_t reqid = blkreplay_next_id();
-    int ret = bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
+    int ret = bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
    block_request_create(reqid, bs, qemu_coroutine_self());
    qemu_coroutine_yield();

@@ -107,10 +107,10 @@ static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
 }

 static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs,
-                                              int64_t offset, int count)
+                                              int64_t offset, int bytes)
 {
    uint64_t reqid = blkreplay_next_id();
-    int ret = bdrv_co_pdiscard(bs->file->bs, offset, count);
+    int ret = bdrv_co_pdiscard(bs->file->bs, offset, bytes);
    block_request_create(reqid, bs, qemu_coroutine_self());
    qemu_coroutine_yield();

--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -301,10 +301,14 @@ static void blkverify_refresh_filename(BlockDriverState *bs, QDict *options)
    if (bs->file->bs->exact_filename[0]
        && s->test_file->bs->exact_filename[0])
    {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "blkverify:%s:%s",
-                 bs->file->bs->exact_filename,
-                 s->test_file->bs->exact_filename);
+        int ret = snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+                           "blkverify:%s:%s",
+                           bs->file->bs->exact_filename,
+                           s->test_file->bs->exact_filename);
+        if (ret >= sizeof(bs->exact_filename)) {
+            /* An overflow makes the filename unusable, so do not report any */
+            bs->exact_filename[0] = 0;
+        }
    }
 }

--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -83,7 +83,6 @@ static const AIOCBInfo block_backend_aiocb_info = {

 static void drive_info_del(DriveInfo *dinfo);
 static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
-static char *blk_get_attached_dev_id(BlockBackend *blk);

 /* All BlockBackends */
 static QTAILQ_HEAD(, BlockBackend) block_backends =
@@ -343,7 +342,7 @@ void blk_unref(BlockBackend *blk)
 * Behaves similarly to blk_next() but iterates over all BlockBackends, even the
 * ones which are hidden (i.e. are not referenced by the monitor).
 */
-static BlockBackend *blk_all_next(BlockBackend *blk)
+BlockBackend *blk_all_next(BlockBackend *blk)
 {
    return blk ? QTAILQ_NEXT(blk, link)
               : QTAILQ_FIRST(&block_backends);
@@ -726,7 +725,7 @@ void *blk_get_attached_dev(BlockBackend *blk)

 /* Return the qdev ID, or if no ID is assigned the QOM path, of the block
 * device attached to the BlockBackend. */
-static char *blk_get_attached_dev_id(BlockBackend *blk)
+char *blk_get_attached_dev_id(BlockBackend *blk)
 {
    DeviceState *dev;

@@ -1099,9 +1098,9 @@ int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
 }

 int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
-                      int count, BdrvRequestFlags flags)
+                      int bytes, BdrvRequestFlags flags)
 {
-    return blk_prw(blk, offset, NULL, count, blk_write_entry,
+    return blk_prw(blk, offset, NULL, bytes, blk_write_entry,
                   flags | BDRV_REQ_ZERO_WRITE);
 }

@@ -1311,10 +1310,10 @@ static void blk_aio_pdiscard_entry(void *opaque)
 }

 BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
-                             int64_t offset, int count,
+                             int64_t offset, int bytes,
                             BlockCompletionFunc *cb, void *opaque)
 {
-    return blk_aio_prwv(blk, offset, count, NULL, blk_aio_pdiscard_entry, 0,
+    return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
                        cb, opaque);
 }

@@ -1374,14 +1373,14 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
    return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque);
 }

-int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count)
+int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
 {
-    int ret = blk_check_byte_request(blk, offset, count);
+    int ret = blk_check_byte_request(blk, offset, bytes);
    if (ret < 0) {
        return ret;
    }

-    return bdrv_co_pdiscard(blk_bs(blk), offset, count);
+    return bdrv_co_pdiscard(blk_bs(blk), offset, bytes);
 }

 int blk_co_flush(BlockBackend *blk)
@@ -1760,9 +1759,9 @@ void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
 }

 int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
-                                      int count, BdrvRequestFlags flags)
+                                      int bytes, BdrvRequestFlags flags)
 {
-    return blk_co_pwritev(blk, offset, count, NULL,
+    return blk_co_pwritev(blk, offset, bytes, NULL,
                          flags | BDRV_REQ_ZERO_WRITE);
 }

@@ -1773,14 +1772,15 @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
                   BDRV_REQ_WRITE_COMPRESSED);
 }

-int blk_truncate(BlockBackend *blk, int64_t offset, Error **errp)
+int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc,
+                 Error **errp)
 {
    if (!blk_is_available(blk)) {
        error_setg(errp, "No medium inserted");
        return -ENOMEDIUM;
    }

-    return bdrv_truncate(blk->root, offset, errp);
+    return bdrv_truncate(blk->root, offset, prealloc, errp);
 }

 static void blk_pdiscard_entry(void *opaque)
@@ -1789,9 +1789,9 @@ static void blk_pdiscard_entry(void *opaque)
    rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size);
 }

-int blk_pdiscard(BlockBackend *blk, int64_t offset, int count)
+int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
 {
-    return blk_prw(blk, offset, NULL, count, blk_pdiscard_entry, 0);
+    return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0);
 }

 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
--- a/block/commit.c
+++ b/block/commit.c
@@ -47,26 +47,25 @@ typedef struct CommitBlockJob {
 } CommitBlockJob;

 static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base,
-                                        int64_t sector_num, int nb_sectors,
+                                        int64_t offset, uint64_t bytes,
                                        void *buf)
 {
    int ret = 0;
    QEMUIOVector qiov;
    struct iovec iov = {
        .iov_base = buf,
-        .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+        .iov_len = bytes,
    };

+    assert(bytes < SIZE_MAX);
    qemu_iovec_init_external(&qiov, &iov, 1);

-    ret = blk_co_preadv(bs, sector_num * BDRV_SECTOR_SIZE,
-                        qiov.size, &qiov, 0);
+    ret = blk_co_preadv(bs, offset, qiov.size, &qiov, 0);
    if (ret < 0) {
        return ret;
    }

-    ret = blk_co_pwritev(base, sector_num * BDRV_SECTOR_SIZE,
-                         qiov.size, &qiov, 0);
+    ret = blk_co_pwritev(base, offset, qiov.size, &qiov, 0);
    if (ret < 0) {
        return ret;
    }
@@ -91,7 +90,9 @@ static void commit_complete(BlockJob *job, void *opaque)

    /* Make sure overlay_bs and top stay around until bdrv_set_backing_hd() */
    bdrv_ref(top);
-    bdrv_ref(overlay_bs);
+    if (overlay_bs) {
+        bdrv_ref(overlay_bs);
+    }

    /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
     * the normal backing chain can be restored. */
@@ -119,6 +120,13 @@ static void commit_complete(BlockJob *job, void *opaque)
    }
    g_free(s->backing_file_str);
    blk_unref(s->top);
+
+    /* If there is more than one reference to the job (e.g. if called from
+     * block_job_finish_sync()), block_job_completed() won't free it and
+     * therefore the blockers on the intermediate nodes remain. This would
+     * cause bdrv_set_backing_hd() to fail. */
+    block_job_remove_all_bdrv(job);
+
    block_job_completed(&s->common, ret);
    g_free(data);

@@ -137,17 +145,16 @@ static void coroutine_fn commit_run(void *opaque)
 {
    CommitBlockJob *s = opaque;
    CommitCompleteData *data;
-    int64_t sector_num, end;
+    int64_t offset;
    uint64_t delay_ns = 0;
    int ret = 0;
-    int n = 0;
+    int64_t n = 0; /* bytes */
    void *buf = NULL;
    int bytes_written = 0;
    int64_t base_len;

    ret = s->common.len = blk_getlength(s->top);

-
    if (s->common.len < 0) {
        goto out;
    }
@@ -158,16 +165,15 @@ static void coroutine_fn commit_run(void *opaque)
    }

    if (base_len < s->common.len) {
-        ret = blk_truncate(s->base, s->common.len, NULL);
+        ret = blk_truncate(s->base, s->common.len, PREALLOC_MODE_OFF, NULL);
        if (ret) {
            goto out;
        }
    }

-    end = s->common.len >> BDRV_SECTOR_BITS;
    buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);

-    for (sector_num = 0; sector_num < end; sector_num += n) {
+    for (offset = 0; offset < s->common.len; offset += n) {
        bool copy;

        /* Note that even when no rate limit is applied we need to yield
@@ -179,14 +185,12 @@ static void coroutine_fn commit_run(void *opaque)
        }
        /* Copy if allocated above the base */
        ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base),
-                                      sector_num,
-                                      COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
-                                      &n);
+                                      offset, COMMIT_BUFFER_SIZE, &n);
        copy = (ret == 1);
-        trace_commit_one_iteration(s, sector_num, n, ret);
+        trace_commit_one_iteration(s, offset, n, ret);
        if (copy) {
-            ret = commit_populate(s->top, s->base, sector_num, n, buf);
-            bytes_written += n * BDRV_SECTOR_SIZE;
+            ret = commit_populate(s->top, s->base, offset, n, buf);
+            bytes_written += n;
        }
        if (ret < 0) {
            BlockErrorAction action =
@@ -199,7 +203,7 @@ static void coroutine_fn commit_run(void *opaque)
            }
        }
        /* Publish progress */
-        s->common.offset += n * BDRV_SECTOR_SIZE;
+        s->common.offset += n;

        if (copy && s->common.speed) {
            delay_ns = ratelimit_calculate_delay(&s->limit, n);
@@ -224,7 +228,7 @@ static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
        error_setg(errp, QERR_INVALID_PARAMETER, "speed");
        return;
    }
-    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
+    ratelimit_set_speed(&s->limit, speed, SLICE_TIME);
 }

 static const BlockJobDriver commit_job_driver = {
@@ -246,7 +250,7 @@ static int64_t coroutine_fn bdrv_commit_top_get_block_status(
 {
    *pnum = nb_sectors;
    *file = bs->backing->bs;
-    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
+    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
           (sector_num << BDRV_SECTOR_BITS);
 }

@@ -437,7 +441,7 @@ fail:
 }


-#define COMMIT_BUF_SECTORS 2048
+#define COMMIT_BUF_SIZE (2048 * BDRV_SECTOR_SIZE)

 /* commit COW file into the raw image */
 int bdrv_commit(BlockDriverState *bs)
@@ -446,8 +450,9 @@ int bdrv_commit(BlockDriverState *bs)
    BlockDriverState *backing_file_bs = NULL;
    BlockDriverState *commit_top_bs = NULL;
    BlockDriver *drv = bs->drv;
-    int64_t sector, total_sectors, length, backing_length;
-    int n, ro, open_flags;
+    int64_t offset, length, backing_length;
+    int ro, open_flags;
+    int64_t n;
    int ret = 0;
    uint8_t *buf = NULL;
    Error *local_err = NULL;
@@ -518,37 +523,33 @@ int bdrv_commit(BlockDriverState *bs)
     * grow the backing file image if possible.  If not possible,
     * we must return an error */
    if (length > backing_length) {
-        ret = blk_truncate(backing, length, &local_err);
+        ret = blk_truncate(backing, length, PREALLOC_MODE_OFF, &local_err);
        if (ret < 0) {
            error_report_err(local_err);
            goto ro_cleanup;
        }
    }

-    total_sectors = length >> BDRV_SECTOR_BITS;
-
    /* blk_try_blockalign() for src will choose an alignment that works for
     * backing as well, so no need to compare the alignment manually. */
-    buf = blk_try_blockalign(src, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
+    buf = blk_try_blockalign(src, COMMIT_BUF_SIZE);
    if (buf == NULL) {
        ret = -ENOMEM;
        goto ro_cleanup;
    }

-    for (sector = 0; sector < total_sectors; sector += n) {
-        ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
+    for (offset = 0; offset < length; offset += n) {
+        ret = bdrv_is_allocated(bs, offset, COMMIT_BUF_SIZE, &n);
        if (ret < 0) {
            goto ro_cleanup;
        }
        if (ret) {
-            ret = blk_pread(src, sector * BDRV_SECTOR_SIZE, buf,
-                            n * BDRV_SECTOR_SIZE);
+            ret = blk_pread(src, offset, buf, n);
            if (ret < 0) {
                goto ro_cleanup;
            }

-            ret = blk_pwrite(backing, sector * BDRV_SECTOR_SIZE, buf,
-                             n * BDRV_SECTOR_SIZE, 0);
+            ret = blk_pwrite(backing, offset, buf, n, 0);
            if (ret < 0) {
                goto ro_cleanup;
            }
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -24,16 +24,10 @@
 #include "sysemu/block-backend.h"
 #include "crypto/block.h"
 #include "qapi/opts-visitor.h"
+#include "qapi/qobject-input-visitor.h"
 #include "qapi-visit.h"
 #include "qapi/error.h"
-
-#define BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET "key-secret"
-#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG "cipher-alg"
-#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE "cipher-mode"
-#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG "ivgen-alg"
-#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG "ivgen-hash-alg"
-#define BLOCK_CRYPTO_OPT_LUKS_HASH_ALG "hash-alg"
-#define BLOCK_CRYPTO_OPT_LUKS_ITER_TIME "iter-time"
+#include "block/crypto.h"

 typedef struct BlockCrypto BlockCrypto;

@@ -135,11 +129,7 @@ static QemuOptsList block_crypto_runtime_opts_luks = {
    .name = "crypto",
    .head = QTAILQ_HEAD_INITIALIZER(block_crypto_runtime_opts_luks.head),
    .desc = {
-        {
-            .name = BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET,
-            .type = QEMU_OPT_STRING,
-            .help = "ID of the secret that provides the encryption key",
-        },
+        BLOCK_CRYPTO_OPT_DEF_LUKS_KEY_SECRET(""),
        { /* end of list */ }
    },
 };
@@ -154,49 +144,21 @@ static QemuOptsList block_crypto_create_opts_luks = {
            .type = QEMU_OPT_SIZE,
            .help = "Virtual disk size"
        },
-        {
-            .name = BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET,
-            .type = QEMU_OPT_STRING,
-            .help = "ID of the secret that provides the encryption key",
-        },
-        {
-            .name = BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG,
-            .type = QEMU_OPT_STRING,
-            .help = "Name of encryption cipher algorithm",
-        },
-        {
-            .name = BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE,
-            .type = QEMU_OPT_STRING,
-            .help = "Name of encryption cipher mode",
-        },
-        {
-            .name = BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG,
-            .type = QEMU_OPT_STRING,
-            .help = "Name of IV generator algorithm",
-        },
-        {
-            .name = BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG,
-            .type = QEMU_OPT_STRING,
-            .help = "Name of IV generator hash algorithm",
-        },
-        {
-            .name = BLOCK_CRYPTO_OPT_LUKS_HASH_ALG,
-            .type = QEMU_OPT_STRING,
-            .help = "Name of encryption hash algorithm",
-        },
-        {
-            .name = BLOCK_CRYPTO_OPT_LUKS_ITER_TIME,
-            .type = QEMU_OPT_NUMBER,
-            .help = "Time to spend in PBKDF in milliseconds",
-        },
+        BLOCK_CRYPTO_OPT_DEF_LUKS_KEY_SECRET(""),
+        BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG(""),
+        BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE(""),
+        BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG(""),
+        BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG(""),
+        BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG(""),
+        BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME(""),
        { /* end of list */ }
    },
 };


-static QCryptoBlockOpenOptions *
+QCryptoBlockOpenOptions *
 block_crypto_open_opts_init(QCryptoBlockFormat format,
-                            QemuOpts *opts,
+                            QDict *opts,
                            Error **errp)
 {
    Visitor *v;
@@ -206,7 +168,7 @@ block_crypto_open_opts_init(QCryptoBlockFormat format,
    ret = g_new0(QCryptoBlockOpenOptions, 1);
    ret->format = format;

-    v = opts_visitor_new(opts);
+    v = qobject_input_visitor_new_keyval(QOBJECT(opts));

    visit_start_struct(v, NULL, NULL, 0, &local_err);
    if (local_err) {
@@ -219,6 +181,11 @@ block_crypto_open_opts_init(QCryptoBlockFormat format,
            v, &ret->u.luks, &local_err);
        break;

+    case Q_CRYPTO_BLOCK_FORMAT_QCOW:
+        visit_type_QCryptoBlockOptionsQCow_members(
+            v, &ret->u.qcow, &local_err);
+        break;
+
    default:
        error_setg(&local_err, "Unsupported block format %d", format);
        break;
@@ -240,9 +207,9 @@ block_crypto_open_opts_init(QCryptoBlockFormat format,
 }


-static QCryptoBlockCreateOptions *
+QCryptoBlockCreateOptions *
 block_crypto_create_opts_init(QCryptoBlockFormat format,
-                              QemuOpts *opts,
+                              QDict *opts,
                              Error **errp)
 {
    Visitor *v;
@@ -252,7 +219,7 @@ block_crypto_create_opts_init(QCryptoBlockFormat format,
    ret = g_new0(QCryptoBlockCreateOptions, 1);
    ret->format = format;

-    v = opts_visitor_new(opts);
+    v = qobject_input_visitor_new_keyval(QOBJECT(opts));

    visit_start_struct(v, NULL, NULL, 0, &local_err);
    if (local_err) {
@@ -265,6 +232,11 @@ block_crypto_create_opts_init(QCryptoBlockFormat format,
            v, &ret->u.luks, &local_err);
        break;

+    case Q_CRYPTO_BLOCK_FORMAT_QCOW:
+        visit_type_QCryptoBlockOptionsQCow_members(
+            v, &ret->u.qcow, &local_err);
+        break;
+
    default:
        error_setg(&local_err, "Unsupported block format %d", format);
        break;
@@ -299,6 +271,7 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
    int ret = -EINVAL;
    QCryptoBlockOpenOptions *open_opts = NULL;
    unsigned int cflags = 0;
+    QDict *cryptoopts = NULL;

    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
                               false, errp);
@@ -313,7 +286,9 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
        goto cleanup;
    }

-    open_opts = block_crypto_open_opts_init(format, opts, errp);
+    cryptoopts = qemu_opts_to_qdict(opts, NULL);
+
+    open_opts = block_crypto_open_opts_init(format, cryptoopts, errp);
    if (!open_opts) {
        goto cleanup;
    }
@@ -321,7 +296,7 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
    if (flags & BDRV_O_NO_IO) {
        cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
    }
-    crypto->block = qcrypto_block_open(open_opts,
+    crypto->block = qcrypto_block_open(open_opts, NULL,
                                       block_crypto_read_func,
                                       bs,
                                       cflags,
@@ -333,10 +308,10 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
    }

    bs->encrypted = true;
-    bs->valid_key = true;

    ret = 0;
 cleanup:
+    QDECREF(cryptoopts);
    qapi_free_QCryptoBlockOpenOptions(open_opts);
    return ret;
 }
@@ -356,13 +331,16 @@ static int block_crypto_create_generic(QCryptoBlockFormat format,
        .opts = opts,
        .filename = filename,
    };
+    QDict *cryptoopts;

-    create_opts = block_crypto_create_opts_init(format, opts, errp);
+    cryptoopts = qemu_opts_to_qdict(opts, NULL);
+
+    create_opts = block_crypto_create_opts_init(format, cryptoopts, errp);
    if (!create_opts) {
        return -1;
    }

-    crypto = qcrypto_block_create(create_opts,
+    crypto = qcrypto_block_create(create_opts, NULL,
                                  block_crypto_init_func,
                                  block_crypto_write_func,
                                  &data,
@@ -375,6 +353,7 @@ static int block_crypto_create_generic(QCryptoBlockFormat format,

    ret = 0;
 cleanup:
+    QDECREF(cryptoopts);
    qcrypto_block_free(crypto);
    blk_unref(data.blk);
    qapi_free_QCryptoBlockCreateOptions(create_opts);
@@ -382,7 +361,7 @@ static int block_crypto_create_generic(QCryptoBlockFormat format,
 }

 static int block_crypto_truncate(BlockDriverState *bs, int64_t offset,
-                                 Error **errp)
+                                 PreallocMode prealloc, Error **errp)
 {
    BlockCrypto *crypto = bs->opaque;
    size_t payload_offset =
@@ -390,7 +369,7 @@ static int block_crypto_truncate(BlockDriverState *bs, int64_t offset,

    offset += payload_offset;

-    return bdrv_truncate(bs->file, offset, errp);
+    return bdrv_truncate(bs->file, offset, prealloc, errp);
 }

 static void block_crypto_close(BlockDriverState *bs)
--- a/block/crypto.h
+++ b/block/crypto.h
@@ -0,0 +1,101 @@
+/*
+ * QEMU block full disk encryption
+ *
+ * Copyright (c) 2015-2017 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef BLOCK_CRYPTO_H__
+#define BLOCK_CRYPTO_H__
+
+#define BLOCK_CRYPTO_OPT_DEF_KEY_SECRET(prefix, helpstr)                \
+    {                                                                   \
+        .name = prefix BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET,                \
+        .type = QEMU_OPT_STRING,                                        \
+        .help = helpstr,                                                \
+    }
+
+#define BLOCK_CRYPTO_OPT_QCOW_KEY_SECRET "key-secret"
+
+#define BLOCK_CRYPTO_OPT_DEF_QCOW_KEY_SECRET(prefix)                    \
+    BLOCK_CRYPTO_OPT_DEF_KEY_SECRET(prefix,                             \
+        "ID of the secret that provides the AES encryption key")
+
+#define BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET "key-secret"
+#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG "cipher-alg"
+#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE "cipher-mode"
+#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG "ivgen-alg"
+#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG "ivgen-hash-alg"
+#define BLOCK_CRYPTO_OPT_LUKS_HASH_ALG "hash-alg"
+#define BLOCK_CRYPTO_OPT_LUKS_ITER_TIME "iter-time"
+
+#define BLOCK_CRYPTO_OPT_DEF_LUKS_KEY_SECRET(prefix)                    \
+    BLOCK_CRYPTO_OPT_DEF_KEY_SECRET(prefix,                             \
+        "ID of the secret that provides the keyslot passphrase")
+
+#define BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_ALG(prefix)       \
+    {                                                      \
+        .name = prefix BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG,   \
+        .type = QEMU_OPT_STRING,                           \
+        .help = "Name of encryption cipher algorithm",     \
+    }
+
+#define BLOCK_CRYPTO_OPT_DEF_LUKS_CIPHER_MODE(prefix)      \
+    {                                                      \
+        .name = prefix BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE,  \
+        .type = QEMU_OPT_STRING,                           \
+        .help = "Name of encryption cipher mode",          \
+    }
+
+#define BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_ALG(prefix)     \
+    {                                                   \
+        .name = prefix BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG, \
+        .type = QEMU_OPT_STRING,                        \
+        .help = "Name of IV generator algorithm",       \
+    }
+
+#define BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG(prefix)        \
+    {                                                           \
+        .name = prefix BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG,    \
+        .type = QEMU_OPT_STRING,                                \
+        .help = "Name of IV generator hash algorithm",          \
+    }
+
+#define BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG(prefix)       \
+    {                                                    \
+        .name = prefix BLOCK_CRYPTO_OPT_LUKS_HASH_ALG,   \
+        .type = QEMU_OPT_STRING,                         \
+        .help = "Name of encryption hash algorithm",     \
+    }
+
+#define BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME(prefix)           \
+    {                                                         \
+        .name = prefix BLOCK_CRYPTO_OPT_LUKS_ITER_TIME,       \
+        .type = QEMU_OPT_NUMBER,                              \
+        .help = "Time to spend in PBKDF in milliseconds",     \
+    }
+
+QCryptoBlockCreateOptions *
+block_crypto_create_opts_init(QCryptoBlockFormat format,
+                              QDict *opts,
+                              Error **errp);
+
+QCryptoBlockOpenOptions *
+block_crypto_open_opts_init(QCryptoBlockFormat format,
+                            QDict *opts,
+                            Error **errp);
+
+#endif /* BLOCK_CRYPTO_H__ */
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -43,8 +43,18 @@ struct BdrvDirtyBitmap {
    BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
    char *name;                 /* Optional non-empty unique ID */
    int64_t size;               /* Size of the bitmap (Number of sectors) */
-    bool disabled;              /* Bitmap is read-only */
+    bool disabled;              /* Bitmap is disabled. It ignores all writes to
+                                   the device */
    int active_iterators;       /* How many iterators are active */
+    bool readonly;              /* Bitmap is read-only. This field also
+                                   prevents the respective image from being
+                                   modified (i.e. blocks writes and discards).
+                                   Such operations must fail and both the image
+                                   and this bitmap must remain unchanged while
+                                   this flag is set. */
+    bool autoload;              /* For persistent bitmaps: bitmap must be
+                                   autoloaded on image opening */
+    bool persistent;            /* bitmap must be saved to owner disk image */
    QLIST_ENTRY(BdrvDirtyBitmap) list;
 };

@@ -93,6 +103,8 @@ void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
    assert(!bdrv_dirty_bitmap_frozen(bitmap));
    g_free(bitmap->name);
    bitmap->name = NULL;
+    bitmap->persistent = false;
+    bitmap->autoload = false;
 }

 /* Called with BQL taken.  */
@@ -289,6 +301,10 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
    bitmap->name = NULL;
    successor->name = name;
    bitmap->successor = NULL;
+    successor->persistent = bitmap->persistent;
+    bitmap->persistent = false;
+    successor->autoload = bitmap->autoload;
+    bitmap->autoload = false;
    bdrv_release_dirty_bitmap(bs, bitmap);

    return successor;
@@ -340,15 +356,20 @@ void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
    bdrv_dirty_bitmaps_unlock(bs);
 }

+static bool bdrv_dirty_bitmap_has_name(BdrvDirtyBitmap *bitmap)
+{
+    return !!bdrv_dirty_bitmap_name(bitmap);
+}
+
 /* Called with BQL taken.  */
-static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
-                                                  BdrvDirtyBitmap *bitmap,
-                                                  bool only_named)
+static void bdrv_do_release_matching_dirty_bitmap(
+    BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+    bool (*cond)(BdrvDirtyBitmap *bitmap))
 {
    BdrvDirtyBitmap *bm, *next;
    bdrv_dirty_bitmaps_lock(bs);
    QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
-        if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
+        if ((!bitmap || bm == bitmap) && (!cond || cond(bm))) {
            assert(!bm->active_iterators);
            assert(!bdrv_dirty_bitmap_frozen(bm));
            assert(!bm->meta);
@@ -373,17 +394,47 @@ out:
 /* Called with BQL taken.  */
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
 {
-    bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
+    bdrv_do_release_matching_dirty_bitmap(bs, bitmap, NULL);
 }

 /**
 * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
 * There must not be any frozen bitmaps attached.
+ * This function does not remove persistent bitmaps from the storage.
 * Called with BQL taken.
 */
 void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
 {
-    bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
+    bdrv_do_release_matching_dirty_bitmap(bs, NULL, bdrv_dirty_bitmap_has_name);
+}
+
+/**
+ * Release all persistent dirty bitmaps attached to a BDS (for use in
+ * bdrv_inactivate_recurse()).
+ * There must not be any frozen bitmaps attached.
+ * This function does not remove persistent bitmaps from the storage.
+ */
+void bdrv_release_persistent_dirty_bitmaps(BlockDriverState *bs)
+{
+    bdrv_do_release_matching_dirty_bitmap(bs, NULL,
+                                          bdrv_dirty_bitmap_get_persistance);
+}
+
+/**
+ * Remove persistent dirty bitmap from the storage if it exists.
+ * Absence of bitmap is not an error, because we have the following scenario:
+ * BdrvDirtyBitmap can have .persistent = true but not yet saved and have no
+ * stored version. For such bitmap bdrv_remove_persistent_dirty_bitmap() should
+ * not fail.
+ * This function doesn't release corresponding BdrvDirtyBitmap.
+ */
+void bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs,
+                                         const char *name,
+                                         Error **errp)
+{
+    if (bs->drv && bs->drv->bdrv_remove_persistent_dirty_bitmap) {
+        bs->drv->bdrv_remove_persistent_dirty_bitmap(bs, name, errp);
+    }
 }

 /* Called with BQL taken.  */
@@ -455,7 +506,7 @@ uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
    return granularity;
 }

-uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
+uint32_t bdrv_dirty_bitmap_granularity(const BdrvDirtyBitmap *bitmap)
 {
    return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
 }
@@ -504,6 +555,7 @@ void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
                                  int64_t cur_sector, int64_t nr_sectors)
 {
    assert(bdrv_dirty_bitmap_enabled(bitmap));
+    assert(!bdrv_dirty_bitmap_readonly(bitmap));
    hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
 }

@@ -520,6 +572,7 @@ void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
                                    int64_t cur_sector, int64_t nr_sectors)
 {
    assert(bdrv_dirty_bitmap_enabled(bitmap));
+    assert(!bdrv_dirty_bitmap_readonly(bitmap));
    hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
 }

@@ -534,6 +587,7 @@ void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
 {
    assert(bdrv_dirty_bitmap_enabled(bitmap));
+    assert(!bdrv_dirty_bitmap_readonly(bitmap));
    bdrv_dirty_bitmap_lock(bitmap);
    if (!out) {
        hbitmap_reset_all(bitmap->bitmap);
@@ -550,6 +604,7 @@ void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
 {
    HBitmap *tmp = bitmap->bitmap;
    assert(bdrv_dirty_bitmap_enabled(bitmap));
+    assert(!bdrv_dirty_bitmap_readonly(bitmap));
    bitmap->bitmap = in;
    hbitmap_free(tmp);
 }
@@ -586,6 +641,13 @@ void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap,
    hbitmap_deserialize_zeroes(bitmap->bitmap, start, count, finish);
 }

+void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap *bitmap,
+                                        uint64_t start, uint64_t count,
+                                        bool finish)
+{
+    hbitmap_deserialize_ones(bitmap->bitmap, start, count, finish);
+}
+
 void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap)
 {
    hbitmap_deserialize_finish(bitmap->bitmap);
@@ -605,6 +667,7 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
        if (!bdrv_dirty_bitmap_enabled(bitmap)) {
            continue;
        }
+        assert(!bdrv_dirty_bitmap_readonly(bitmap));
        hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
    }
    bdrv_dirty_bitmaps_unlock(bs);
@@ -627,3 +690,78 @@ int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap)
 {
    return hbitmap_count(bitmap->meta);
 }
+
+bool bdrv_dirty_bitmap_readonly(const BdrvDirtyBitmap *bitmap)
+{
+    return bitmap->readonly;
+}
+
+/* Called with BQL taken. */
+void bdrv_dirty_bitmap_set_readonly(BdrvDirtyBitmap *bitmap, bool value)
+{
+    qemu_mutex_lock(bitmap->mutex);
+    bitmap->readonly = value;
+    qemu_mutex_unlock(bitmap->mutex);
+}
+
+bool bdrv_has_readonly_bitmaps(BlockDriverState *bs)
+{
+    BdrvDirtyBitmap *bm;
+    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
+        if (bm->readonly) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+/* Called with BQL taken. */
+void bdrv_dirty_bitmap_set_autoload(BdrvDirtyBitmap *bitmap, bool autoload)
+{
+    qemu_mutex_lock(bitmap->mutex);
+    bitmap->autoload = autoload;
+    qemu_mutex_unlock(bitmap->mutex);
+}
+
+bool bdrv_dirty_bitmap_get_autoload(const BdrvDirtyBitmap *bitmap)
+{
+    return bitmap->autoload;
+}
+
+/* Called with BQL taken. */
+void bdrv_dirty_bitmap_set_persistance(BdrvDirtyBitmap *bitmap, bool persistent)
+{
+    qemu_mutex_lock(bitmap->mutex);
+    bitmap->persistent = persistent;
+    qemu_mutex_unlock(bitmap->mutex);
+}
+
+bool bdrv_dirty_bitmap_get_persistance(BdrvDirtyBitmap *bitmap)
+{
+    return bitmap->persistent;
+}
+
+bool bdrv_has_changed_persistent_bitmaps(BlockDriverState *bs)
+{
+    BdrvDirtyBitmap *bm;
+    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
+        if (bm->persistent && !bm->readonly) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+BdrvDirtyBitmap *bdrv_dirty_bitmap_next(BlockDriverState *bs,
+                                        BdrvDirtyBitmap *bitmap)
+{
+    return bitmap == NULL ? QLIST_FIRST(&bs->dirty_bitmaps) :
+                            QLIST_NEXT(bitmap, list);
+}
+
+char *bdrv_dirty_bitmap_sha256(const BdrvDirtyBitmap *bitmap, Error **errp)
+{
+    return hbitmap_sha256(bitmap->bitmap, errp);
+}
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1485,7 +1485,7 @@ static int aio_worker(void *arg)

 static int paio_submit_co(BlockDriverState *bs, int fd,
                          int64_t offset, QEMUIOVector *qiov,
-                          int count, int type)
+                          int bytes, int type)
 {
    RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
    ThreadPool *pool;
@@ -1494,22 +1494,22 @@ static int paio_submit_co(BlockDriverState *bs, int fd,
    acb->aio_type = type;
    acb->aio_fildes = fd;

-    acb->aio_nbytes = count;
+    acb->aio_nbytes = bytes;
    acb->aio_offset = offset;

    if (qiov) {
        acb->aio_iov = qiov->iov;
        acb->aio_niov = qiov->niov;
-        assert(qiov->size == count);
+        assert(qiov->size == bytes);
    }

-    trace_paio_submit_co(offset, count, type);
+    trace_paio_submit_co(offset, bytes, type);
    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
    return thread_pool_submit_co(pool, aio_worker, acb);
 }

 static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
-        int64_t offset, QEMUIOVector *qiov, int count,
+        int64_t offset, QEMUIOVector *qiov, int bytes,
        BlockCompletionFunc *cb, void *opaque, int type)
 {
    RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
@@ -1519,7 +1519,7 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
    acb->aio_type = type;
    acb->aio_fildes = fd;

-    acb->aio_nbytes = count;
+    acb->aio_nbytes = bytes;
    acb->aio_offset = offset;

    if (qiov) {
@@ -1528,7 +1528,7 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
        assert(qiov->size == acb->aio_nbytes);
    }

-    trace_paio_submit(acb, opaque, offset, count, type);
+    trace_paio_submit(acb, opaque, offset, bytes, type);
    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
    return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
 }
@@ -1624,7 +1624,122 @@ static void raw_close(BlockDriverState *bs)
    }
 }

-static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+/**
+ * Truncates the given regular file @fd to @offset and, when growing, fills the
+ * new space according to @prealloc.
+ *
+ * Returns: 0 on success, -errno on failure.
+ */
+static int raw_regular_truncate(int fd, int64_t offset, PreallocMode prealloc,
+                                Error **errp)
+{
+    int result = 0;
+    int64_t current_length = 0;
+    char *buf = NULL;
+    struct stat st;
+
+    if (fstat(fd, &st) < 0) {
+        result = -errno;
+        error_setg_errno(errp, -result, "Could not stat file");
+        return result;
+    }
+
+    current_length = st.st_size;
+    if (current_length > offset && prealloc != PREALLOC_MODE_OFF) {
+        error_setg(errp, "Cannot use preallocation for shrinking files");
+        return -ENOTSUP;
+    }
+
+    switch (prealloc) {
+#ifdef CONFIG_POSIX_FALLOCATE
+    case PREALLOC_MODE_FALLOC:
+        /*
+         * Truncating before posix_fallocate() makes it about twice slower on
+         * file systems that do not support fallocate(), trying to check if a
+         * block is allocated before allocating it, so don't do that here.
+         */
+        result = -posix_fallocate(fd, current_length, offset - current_length);
+        if (result != 0) {
+            /* posix_fallocate() doesn't set errno. */
+            error_setg_errno(errp, -result,
+                             "Could not preallocate new data");
+        }
+        goto out;
+#endif
+    case PREALLOC_MODE_FULL:
+    {
+        int64_t num = 0, left = offset - current_length;
+
+        /*
+         * Knowing the final size from the beginning could allow the file
+         * system driver to do less allocations and possibly avoid
+         * fragmentation of the file.
+         */
+        if (ftruncate(fd, offset) != 0) {
+            result = -errno;
+            error_setg_errno(errp, -result, "Could not resize file");
+            goto out;
+        }
+
+        buf = g_malloc0(65536);
+
+        result = lseek(fd, current_length, SEEK_SET);
+        if (result < 0) {
+            result = -errno;
+            error_setg_errno(errp, -result,
+                             "Failed to seek to the old end of file");
+            goto out;
+        }
+
+        while (left > 0) {
+            num = MIN(left, 65536);
+            result = write(fd, buf, num);
+            if (result < 0) {
+                result = -errno;
+                error_setg_errno(errp, -result,
+                                 "Could not write zeros for preallocation");
+                goto out;
+            }
+            left -= result;
+        }
+        if (result >= 0) {
+            result = fsync(fd);
+            if (result < 0) {
+                result = -errno;
+                error_setg_errno(errp, -result,
+                                 "Could not flush file to disk");
+                goto out;
+            }
+        }
+        goto out;
+    }
+    case PREALLOC_MODE_OFF:
+        if (ftruncate(fd, offset) != 0) {
+            result = -errno;
+            error_setg_errno(errp, -result, "Could not resize file");
+        }
+        return result;
+    default:
+        result = -ENOTSUP;
+        error_setg(errp, "Unsupported preallocation mode: %s",
+                   PreallocMode_lookup[prealloc]);
+        return result;
+    }
+
+out:
+    if (result < 0) {
+        if (ftruncate(fd, current_length) < 0) {
+            error_report("Failed to restore old file length: %s",
+                         strerror(errno));
+        }
+    }
+
+    g_free(buf);
+    return result;
+}
+
+static int raw_truncate(BlockDriverState *bs, int64_t offset,
+                        PreallocMode prealloc, Error **errp)
 {
    BDRVRawState *s = bs->opaque;
    struct stat st;
@@ -1637,12 +1752,16 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
    }

    if (S_ISREG(st.st_mode)) {
-        if (ftruncate(s->fd, offset) < 0) {
-            ret = -errno;
-            error_setg_errno(errp, -ret, "Failed to resize the file");
-            return ret;
-        }
-    } else if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
+        return raw_regular_truncate(s->fd, offset, prealloc, errp);
+    }
+
+    if (prealloc != PREALLOC_MODE_OFF) {
+        error_setg(errp, "Preallocation mode '%s' unsupported for this "
+                   "non-regular file", PreallocMode_lookup[prealloc]);
+        return -ENOTSUP;
+    }
+
+    if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
        if (offset > raw_getlength(bs)) {
            error_setg(errp, "Cannot grow device files");
            return -EINVAL;
@@ -1885,71 +2004,9 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
 #endif
    }

-    switch (prealloc) {
-#ifdef CONFIG_POSIX_FALLOCATE
-    case PREALLOC_MODE_FALLOC:
-        /*
-         * Truncating before posix_fallocate() makes it about twice slower on
-         * file systems that do not support fallocate(), trying to check if a
-         * block is allocated before allocating it, so don't do that here.
-         */
-        result = -posix_fallocate(fd, 0, total_size);
-        if (result != 0) {
-            /* posix_fallocate() doesn't set errno. */
-            error_setg_errno(errp, -result,
-                             "Could not preallocate data for the new file");
-        }
-        break;
-#endif
-    case PREALLOC_MODE_FULL:
-    {
-        /*
-         * Knowing the final size from the beginning could allow the file
-         * system driver to do less allocations and possibly avoid
-         * fragmentation of the file.
-         */
-        if (ftruncate(fd, total_size) != 0) {
-            result = -errno;
-            error_setg_errno(errp, -result, "Could not resize file");
-            goto out_close;
-        }
-
-        int64_t num = 0, left = total_size;
-        buf = g_malloc0(65536);
-
-        while (left > 0) {
-            num = MIN(left, 65536);
-            result = write(fd, buf, num);
-            if (result < 0) {
-                result = -errno;
-                error_setg_errno(errp, -result,
-                                 "Could not write to the new file");
-                break;
-            }
-            left -= result;
-        }
-        if (result >= 0) {
-            result = fsync(fd);
-            if (result < 0) {
-                result = -errno;
-                error_setg_errno(errp, -result,
-                                 "Could not flush new file to disk");
-            }
-        }
-        g_free(buf);
-        break;
-    }
-    case PREALLOC_MODE_OFF:
-        if (ftruncate(fd, total_size) != 0) {
-            result = -errno;
-            error_setg_errno(errp, -result, "Could not resize file");
-        }
-        break;
-    default:
-        result = -EINVAL;
-        error_setg(errp, "Unsupported preallocation mode: %s",
-                   PreallocMode_lookup[prealloc]);
-        break;
+    result = raw_regular_truncate(fd, total_size, prealloc, errp);
+    if (result < 0) {
+        goto out_close;
    }

 out_close:
@@ -2109,26 +2166,26 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
 }

 static coroutine_fn BlockAIOCB *raw_aio_pdiscard(BlockDriverState *bs,
-    int64_t offset, int count,
+    int64_t offset, int bytes,
    BlockCompletionFunc *cb, void *opaque)
 {
    BDRVRawState *s = bs->opaque;

-    return paio_submit(bs, s->fd, offset, NULL, count,
+    return paio_submit(bs, s->fd, offset, NULL, bytes,
                       cb, opaque, QEMU_AIO_DISCARD);
 }

 static int coroutine_fn raw_co_pwrite_zeroes(
    BlockDriverState *bs, int64_t offset,
-    int count, BdrvRequestFlags flags)
+    int bytes, BdrvRequestFlags flags)
 {
    BDRVRawState *s = bs->opaque;

    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
-        return paio_submit_co(bs, s->fd, offset, NULL, count,
+        return paio_submit_co(bs, s->fd, offset, NULL, bytes,
                              QEMU_AIO_WRITE_ZEROES);
    } else if (s->discard_zeroes) {
-        return paio_submit_co(bs, s->fd, offset, NULL, count,
+        return paio_submit_co(bs, s->fd, offset, NULL, bytes,
                              QEMU_AIO_DISCARD);
    }
    return -ENOTSUP;
@@ -2560,7 +2617,7 @@ static int fd_open(BlockDriverState *bs)
 }

 static coroutine_fn BlockAIOCB *hdev_aio_pdiscard(BlockDriverState *bs,
-    int64_t offset, int count,
+    int64_t offset, int bytes,
    BlockCompletionFunc *cb, void *opaque)
 {
    BDRVRawState *s = bs->opaque;
@@ -2568,12 +2625,12 @@ static coroutine_fn BlockAIOCB *hdev_aio_pdiscard(BlockDriverState *bs,
    if (fd_open(bs) < 0) {
        return NULL;
    }
-    return paio_submit(bs, s->fd, offset, NULL, count,
+    return paio_submit(bs, s->fd, offset, NULL, bytes,
                       cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
 }

 static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int count, BdrvRequestFlags flags)
+    int64_t offset, int bytes, BdrvRequestFlags flags)
 {
    BDRVRawState *s = bs->opaque;
    int rc;
@@ -2583,10 +2640,10 @@ static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs,
        return rc;
    }
    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
-        return paio_submit_co(bs, s->fd, offset, NULL, count,
+        return paio_submit_co(bs, s->fd, offset, NULL, bytes,
                              QEMU_AIO_WRITE_ZEROES|QEMU_AIO_BLKDEV);
    } else if (s->discard_zeroes) {
-        return paio_submit_co(bs, s->fd, offset, NULL, count,
+        return paio_submit_co(bs, s->fd, offset, NULL, bytes,
                              QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
    }
    return -ENOTSUP;
--- a/block/file-win32.c
+++ b/block/file-win32.c
@@ -461,12 +461,19 @@ static void raw_close(BlockDriverState *bs)
    }
 }

-static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+static int raw_truncate(BlockDriverState *bs, int64_t offset,
+                        PreallocMode prealloc, Error **errp)
 {
    BDRVRawState *s = bs->opaque;
    LONG low, high;
    DWORD dwPtrLow;

+    if (prealloc != PREALLOC_MODE_OFF) {
+        error_setg(errp, "Unsupported preallocation mode '%s'",
+                   PreallocMode_lookup[prealloc]);
+        return -ENOTSUP;
+    }
+
    low = offset;
    high = offset >> 32;

--- a/block/gluster.c
+++ b/block/gluster.c
@@ -345,8 +345,7 @@ static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
        is_unix = true;
    } else if (!strcmp(uri->scheme, "gluster+rdma")) {
        gsconf->type = SOCKET_ADDRESS_TYPE_INET;
-        error_report("Warning: rdma feature is not supported, falling "
-                     "back to tcp");
+        warn_report("rdma feature is not supported, falling back to tcp");
    } else {
        ret = -EINVAL;
        goto out;
@@ -1096,11 +1095,17 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
 }

 static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset,
-                                 Error **errp)
+                                 PreallocMode prealloc, Error **errp)
 {
    int ret;
    BDRVGlusterState *s = bs->opaque;

+    if (prealloc != PREALLOC_MODE_OFF) {
+        error_setg(errp, "Unsupported preallocation mode '%s'",
+                   PreallocMode_lookup[prealloc]);
+        return -ENOTSUP;
+    }
+
    ret = glfs_ftruncate(s->fd, offset);
    if (ret < 0) {
        ret = -errno;
--- a/block/io.c
+++ b/block/io.c
@@ -34,16 +34,8 @@

 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */

-static BlockAIOCB *bdrv_co_aio_prw_vector(BdrvChild *child,
-                                          int64_t offset,
-                                          QEMUIOVector *qiov,
-                                          BdrvRequestFlags flags,
-                                          BlockCompletionFunc *cb,
-                                          void *opaque,
-                                          bool is_write);
-static void coroutine_fn bdrv_co_do_rw(void *opaque);
 static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int count, BdrvRequestFlags flags);
+    int64_t offset, int bytes, BdrvRequestFlags flags);

 void bdrv_parent_drained_begin(BlockDriverState *bs)
 {
@@ -157,6 +149,37 @@ bool bdrv_requests_pending(BlockDriverState *bs)
    return false;
 }

+typedef struct {
+    Coroutine *co;
+    BlockDriverState *bs;
+    bool done;
+} BdrvCoDrainData;
+
+static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
+{
+    BdrvCoDrainData *data = opaque;
+    BlockDriverState *bs = data->bs;
+
+    bs->drv->bdrv_co_drain(bs);
+
+    /* Set data->done before reading bs->wakeup.  */
+    atomic_mb_set(&data->done, true);
+    bdrv_wakeup(bs);
+}
+
+static void bdrv_drain_invoke(BlockDriverState *bs)
+{
+    BdrvCoDrainData data = { .bs = bs, .done = false };
+
+    if (!bs->drv || !bs->drv->bdrv_co_drain) {
+        return;
+    }
+
+    data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data);
+    bdrv_coroutine_enter(bs, data.co);
+    BDRV_POLL_WHILE(bs, !data.done);
+}
+
 static bool bdrv_drain_recurse(BlockDriverState *bs)
 {
    BdrvChild *child, *tmp;
@@ -164,9 +187,8 @@ static bool bdrv_drain_recurse(BlockDriverState *bs)

    waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);

-    if (bs->drv && bs->drv->bdrv_drain) {
-        bs->drv->bdrv_drain(bs);
-    }
+    /* Ensure any pending metadata writes are submitted to bs->file.  */
+    bdrv_drain_invoke(bs);

    QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
        BlockDriverState *bs = child->bs;
@@ -192,12 +214,6 @@ static bool bdrv_drain_recurse(BlockDriverState *bs)
    return waited;
 }

-typedef struct {
-    Coroutine *co;
-    BlockDriverState *bs;
-    bool done;
-} BdrvCoDrainData;
-
 static void bdrv_co_drain_bh_cb(void *opaque)
 {
    BdrvCoDrainData *data = opaque;
@@ -426,27 +442,6 @@ static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
    req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
 }

-/**
- * Round a region to cluster boundaries (sector-based)
- */
-void bdrv_round_sectors_to_clusters(BlockDriverState *bs,
-                                    int64_t sector_num, int nb_sectors,
-                                    int64_t *cluster_sector_num,
-                                    int *cluster_nb_sectors)
-{
-    BlockDriverInfo bdi;
-
-    if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
-        *cluster_sector_num = sector_num;
-        *cluster_nb_sectors = nb_sectors;
-    } else {
-        int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
-        *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
-        *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
-                                            nb_sectors, c);
-    }
-}
-
 /**
 * Round a region to cluster boundaries
 */
@@ -674,12 +669,12 @@ int bdrv_write(BdrvChild *child, int64_t sector_num,
 }

 int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
-                       int count, BdrvRequestFlags flags)
+                       int bytes, BdrvRequestFlags flags)
 {
    QEMUIOVector qiov;
    struct iovec iov = {
        .iov_base = NULL,
-        .iov_len = count,
+        .iov_len = bytes,
    };

    qemu_iovec_init_external(&qiov, &iov, 1);
@@ -1062,17 +1057,18 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
    }

    if (flags & BDRV_REQ_COPY_ON_READ) {
-        int64_t start_sector = offset >> BDRV_SECTOR_BITS;
-        int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
-        unsigned int nb_sectors = end_sector - start_sector;
-        int pnum;
+        /* TODO: Simplify further once bdrv_is_allocated no longer
+         * requires sector alignment */
+        int64_t start = QEMU_ALIGN_DOWN(offset, BDRV_SECTOR_SIZE);
+        int64_t end = QEMU_ALIGN_UP(offset + bytes, BDRV_SECTOR_SIZE);
+        int64_t pnum;

-        ret = bdrv_is_allocated(bs, start_sector, nb_sectors, &pnum);
+        ret = bdrv_is_allocated(bs, start, end - start, &pnum);
        if (ret < 0) {
            goto out;
        }

-        if (!ret || pnum != nb_sectors) {
+        if (!ret || pnum != end - start) {
            ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov);
            goto out;
        }
@@ -1220,7 +1216,7 @@ int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num,
 #define MAX_WRITE_ZEROES_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)

 static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int count, BdrvRequestFlags flags)
+    int64_t offset, int bytes, BdrvRequestFlags flags)
 {
    BlockDriver *drv = bs->drv;
    QEMUIOVector qiov;
@@ -1238,12 +1234,12 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,

    assert(alignment % bs->bl.request_alignment == 0);
    head = offset % alignment;
-    tail = (offset + count) % alignment;
+    tail = (offset + bytes) % alignment;
    max_write_zeroes = QEMU_ALIGN_DOWN(max_write_zeroes, alignment);
    assert(max_write_zeroes >= bs->bl.request_alignment);

-    while (count > 0 && !ret) {
-        int num = count;
+    while (bytes > 0 && !ret) {
+        int num = bytes;

        /* Align request.  Block drivers can expect the "bulk" of the request
         * to be aligned, and that unaligned requests do not cross cluster
@@ -1253,7 +1249,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
            /* Make a small request up to the first aligned sector. For
             * convenience, limit this request to max_transfer even if
             * we don't need to fall back to writes.  */
-            num = MIN(MIN(count, max_transfer), alignment - head);
+            num = MIN(MIN(bytes, max_transfer), alignment - head);
            head = (head + num) % alignment;
            assert(num < max_write_zeroes);
        } else if (tail && num > alignment) {
@@ -1314,7 +1310,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
        }

        offset += num;
-        count -= num;
+        bytes -= num;
    }

 fail:
@@ -1343,6 +1339,10 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
    uint64_t bytes_remaining = bytes;
    int max_transfer;

+    if (bdrv_has_readonly_bitmaps(bs)) {
+        return -EPERM;
+    }
+
    assert(is_power_of_2(align));
    assert((offset & (align - 1)) == 0);
    assert((bytes & (align - 1)) == 0);
@@ -1666,15 +1666,15 @@ int coroutine_fn bdrv_co_writev(BdrvChild *child, int64_t sector_num,
 }

 int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
-                                       int count, BdrvRequestFlags flags)
+                                       int bytes, BdrvRequestFlags flags)
 {
-    trace_bdrv_co_pwrite_zeroes(child->bs, offset, count, flags);
+    trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);

    if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
        flags &= ~BDRV_REQ_MAY_UNMAP;
    }

-    return bdrv_co_pwritev(child, offset, count, NULL,
+    return bdrv_co_pwritev(child, offset, bytes, NULL,
                           BDRV_REQ_ZERO_WRITE | flags);
 }

@@ -1719,15 +1719,16 @@ typedef struct BdrvCoGetBlockStatusData {
 * Drivers not implementing the functionality are assumed to not support
 * backing files, hence all their sectors are reported as allocated.
 *
- * If 'sector_num' is beyond the end of the disk image the return value is 0
- * and 'pnum' is set to 0.
+ * If 'sector_num' is beyond the end of the disk image the return value is
+ * BDRV_BLOCK_EOF and 'pnum' is set to 0.
 *
 * 'pnum' is set to the number of sectors (including and immediately following
 * the specified sector) that are known to be in the same
 * allocated/unallocated state.
 *
 * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
- * beyond the end of the disk image it will be clamped.
+ * beyond the end of the disk image it will be clamped; if 'pnum' is set to
+ * the end of the image, then the returned value will include BDRV_BLOCK_EOF.
 *
 * If returned value is positive and BDRV_BLOCK_OFFSET_VALID bit is set, 'file'
 * points to the BDS which the sector range is allocated in.
@@ -1741,6 +1742,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
    int64_t n;
    int64_t ret, ret2;

+    *file = NULL;
    total_sectors = bdrv_nb_sectors(bs);
    if (total_sectors < 0) {
        return total_sectors;
@@ -1748,7 +1750,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,

    if (sector_num >= total_sectors) {
        *pnum = 0;
-        return 0;
+        return BDRV_BLOCK_EOF;
    }

    n = total_sectors - sector_num;
@@ -1759,13 +1761,16 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
    if (!bs->drv->bdrv_co_get_block_status) {
        *pnum = nb_sectors;
        ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
+        if (sector_num + nb_sectors == total_sectors) {
+            ret |= BDRV_BLOCK_EOF;
+        }
        if (bs->drv->protocol_name) {
            ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
+            *file = bs;
        }
        return ret;
    }

-    *file = NULL;
    bdrv_inc_in_flight(bs);
    ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum,
                                            file);
@@ -1775,7 +1780,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
    }

    if (ret & BDRV_BLOCK_RAW) {
-        assert(ret & BDRV_BLOCK_OFFSET_VALID);
+        assert(ret & BDRV_BLOCK_OFFSET_VALID && *file);
        ret = bdrv_co_get_block_status(*file, ret >> BDRV_SECTOR_BITS,
                                       *pnum, pnum, file);
        goto out;
@@ -1807,10 +1812,13 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
            /* Ignore errors.  This is just providing extra information, it
             * is useful but not necessary.
             */
-            if (!file_pnum) {
-                /* !file_pnum indicates an offset at or beyond the EOF; it is
-                 * perfectly valid for the format block driver to point to such
-                 * offsets, so catch it and mark everything as zero */
+            if (ret2 & BDRV_BLOCK_EOF &&
+                (!file_pnum || ret2 & BDRV_BLOCK_ZERO)) {
+                /*
+                 * It is valid for the format block driver to read
+                 * beyond the end of the underlying file's current
+                 * size; such areas read as zero.
+                 */
                ret |= BDRV_BLOCK_ZERO;
            } else {
                /* Limit request to the range reported by the protocol driver */
@@ -1822,6 +1830,9 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,

 out:
    bdrv_dec_in_flight(bs);
+    if (ret >= 0 && sector_num + *pnum == total_sectors) {
+        ret |= BDRV_BLOCK_EOF;
+    }
    return ret;
 }

@@ -1834,16 +1845,30 @@ static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState *bs,
 {
    BlockDriverState *p;
    int64_t ret = 0;
+    bool first = true;

    assert(bs != base);
    for (p = bs; p != base; p = backing_bs(p)) {
        ret = bdrv_co_get_block_status(p, sector_num, nb_sectors, pnum, file);
-        if (ret < 0 || ret & BDRV_BLOCK_ALLOCATED) {
+        if (ret < 0) {
+            break;
+        }
+        if (ret & BDRV_BLOCK_ZERO && ret & BDRV_BLOCK_EOF && !first) {
+            /*
+             * Reading beyond the end of the file continues to read
+             * zeroes, but we can only widen the result to the
+             * unallocated length we learned from an earlier
+             * iteration.
+             */
+            *pnum = nb_sectors;
+        }
+        if (ret & (BDRV_BLOCK_ZERO | BDRV_BLOCK_DATA)) {
            break;
        }
        /* [sector_num, pnum] unallocated on this layer, which could be only
         * the first part of [sector_num, nb_sectors].  */
        nb_sectors = MIN(nb_sectors, *pnum);
+        first = false;
    }
    return ret;
 }
@@ -1904,59 +1929,72 @@ int64_t bdrv_get_block_status(BlockDriverState *bs,
                                       sector_num, nb_sectors, pnum, file);
 }

-int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
-                                   int nb_sectors, int *pnum)
+int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
+                                   int64_t bytes, int64_t *pnum)
 {
    BlockDriverState *file;
-    int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum,
-                                        &file);
+    int64_t sector_num = offset >> BDRV_SECTOR_BITS;
+    int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+    int64_t ret;
+    int psectors;
+
+    assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
+    assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE) && bytes < INT_MAX);
+    ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &psectors,
+                                &file);
    if (ret < 0) {
        return ret;
    }
+    if (pnum) {
+        *pnum = psectors * BDRV_SECTOR_SIZE;
+    }
    return !!(ret & BDRV_BLOCK_ALLOCATED);
 }

 /*
 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
 *
- * Return true if the given sector is allocated in any image between
- * BASE and TOP (inclusive).  BASE can be NULL to check if the given
- * sector is allocated in any image of the chain.  Return false otherwise.
+ * Return true if (a prefix of) the given range is allocated in any image
+ * between BASE and TOP (inclusive).  BASE can be NULL to check if the given
+ * offset is allocated in any image of the chain.  Return false otherwise,
+ * or negative errno on failure.
 *
- * 'pnum' is set to the number of sectors (including and immediately following
- *  the specified sector) that are known to be in the same
- *  allocated/unallocated state.
+ * 'pnum' is set to the number of bytes (including and immediately
+ * following the specified offset) that are known to be in the same
+ * allocated/unallocated state.  Note that a subsequent call starting
+ * at 'offset + *pnum' may return the same allocation status (in other
+ * words, the result is not necessarily the maximum possible range);
+ * but 'pnum' will only be 0 when end of file is reached.
 *
 */
 int bdrv_is_allocated_above(BlockDriverState *top,
                            BlockDriverState *base,
-                            int64_t sector_num,
-                            int nb_sectors, int *pnum)
+                            int64_t offset, int64_t bytes, int64_t *pnum)
 {
    BlockDriverState *intermediate;
-    int ret, n = nb_sectors;
+    int ret;
+    int64_t n = bytes;

    intermediate = top;
    while (intermediate && intermediate != base) {
-        int pnum_inter;
-        ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
-                                &pnum_inter);
+        int64_t pnum_inter;
+        int64_t size_inter;
+
+        ret = bdrv_is_allocated(intermediate, offset, bytes, &pnum_inter);
        if (ret < 0) {
            return ret;
-        } else if (ret) {
+        }
+        if (ret) {
            *pnum = pnum_inter;
            return 1;
        }

-        /*
-         * [sector_num, nb_sectors] is unallocated on top but intermediate
-         * might have
-         *
-         * [sector_num+x, nr_sectors] allocated.
-         */
+        size_inter = bdrv_getlength(intermediate);
+        if (size_inter < 0) {
+            return size_inter;
+        }
        if (n > pnum_inter &&
-            (intermediate == top ||
-             sector_num + pnum_inter < intermediate->total_sectors)) {
+            (intermediate == top || offset + pnum_inter < size_inter)) {
            n = pnum_inter;
        }

@@ -1980,17 +2018,24 @@ bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
                   bool is_read)
 {
    BlockDriver *drv = bs->drv;
+    int ret = -ENOTSUP;
+
+    bdrv_inc_in_flight(bs);

    if (!drv) {
-        return -ENOMEDIUM;
+        ret = -ENOMEDIUM;
    } else if (drv->bdrv_load_vmstate) {
-        return is_read ? drv->bdrv_load_vmstate(bs, qiov, pos)
-                       : drv->bdrv_save_vmstate(bs, qiov, pos);
+        if (is_read) {
+            ret = drv->bdrv_load_vmstate(bs, qiov, pos);
+        } else {
+            ret = drv->bdrv_save_vmstate(bs, qiov, pos);
+        }
    } else if (bs->file) {
-        return bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read);
+        ret = bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read);
    }

-    return -ENOTSUP;
+    bdrv_dec_in_flight(bs);
+    return ret;
 }

 static void coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque)
@@ -2016,9 +2061,7 @@ bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
        Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry, &data);

        bdrv_coroutine_enter(bs, co);
-        while (data.ret == -EINPROGRESS) {
-            aio_poll(bdrv_get_aio_context(bs), true);
-        }
+        BDRV_POLL_WHILE(bs, data.ret == -EINPROGRESS);
        return data.ret;
    }
 }
@@ -2075,28 +2118,6 @@ int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
 /**************************************************************/
 /* async I/Os */

-BlockAIOCB *bdrv_aio_readv(BdrvChild *child, int64_t sector_num,
-                           QEMUIOVector *qiov, int nb_sectors,
-                           BlockCompletionFunc *cb, void *opaque)
-{
-    trace_bdrv_aio_readv(child->bs, sector_num, nb_sectors, opaque);
-
-    assert(nb_sectors << BDRV_SECTOR_BITS == qiov->size);
-    return bdrv_co_aio_prw_vector(child, sector_num << BDRV_SECTOR_BITS, qiov,
-                                  0, cb, opaque, false);
-}
-
-BlockAIOCB *bdrv_aio_writev(BdrvChild *child, int64_t sector_num,
-                            QEMUIOVector *qiov, int nb_sectors,
-                            BlockCompletionFunc *cb, void *opaque)
-{
-    trace_bdrv_aio_writev(child->bs, sector_num, nb_sectors, opaque);
-
-    assert(nb_sectors << BDRV_SECTOR_BITS == qiov->size);
-    return bdrv_co_aio_prw_vector(child, sector_num << BDRV_SECTOR_BITS, qiov,
-                                  0, cb, opaque, true);
-}
-
 void bdrv_aio_cancel(BlockAIOCB *acb)
 {
    qemu_aio_ref(acb);
@@ -2128,147 +2149,6 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb)
    }
 }

-/**************************************************************/
-/* async block device emulation */
-
-typedef struct BlockRequest {
-    union {
-        /* Used during read, write, trim */
-        struct {
-            int64_t offset;
-            int bytes;
-            int flags;
-            QEMUIOVector *qiov;
-        };
-        /* Used during ioctl */
-        struct {
-            int req;
-            void *buf;
-        };
-    };
-    BlockCompletionFunc *cb;
-    void *opaque;
-
-    int error;
-} BlockRequest;
-
-typedef struct BlockAIOCBCoroutine {
-    BlockAIOCB common;
-    BdrvChild *child;
-    BlockRequest req;
-    bool is_write;
-    bool need_bh;
-    bool *done;
-} BlockAIOCBCoroutine;
-
-static const AIOCBInfo bdrv_em_co_aiocb_info = {
-    .aiocb_size         = sizeof(BlockAIOCBCoroutine),
-};
-
-static void bdrv_co_complete(BlockAIOCBCoroutine *acb)
-{
-    if (!acb->need_bh) {
-        bdrv_dec_in_flight(acb->common.bs);
-        acb->common.cb(acb->common.opaque, acb->req.error);
-        qemu_aio_unref(acb);
-    }
-}
-
-static void bdrv_co_em_bh(void *opaque)
-{
-    BlockAIOCBCoroutine *acb = opaque;
-
-    assert(!acb->need_bh);
-    bdrv_co_complete(acb);
-}
-
-static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
-{
-    acb->need_bh = false;
-    if (acb->req.error != -EINPROGRESS) {
-        BlockDriverState *bs = acb->common.bs;
-
-        aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
-    }
-}
-
-/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
-static void coroutine_fn bdrv_co_do_rw(void *opaque)
-{
-    BlockAIOCBCoroutine *acb = opaque;
-
-    if (!acb->is_write) {
-        acb->req.error = bdrv_co_preadv(acb->child, acb->req.offset,
-            acb->req.qiov->size, acb->req.qiov, acb->req.flags);
-    } else {
-        acb->req.error = bdrv_co_pwritev(acb->child, acb->req.offset,
-            acb->req.qiov->size, acb->req.qiov, acb->req.flags);
-    }
-
-    bdrv_co_complete(acb);
-}
-
-static BlockAIOCB *bdrv_co_aio_prw_vector(BdrvChild *child,
-                                          int64_t offset,
-                                          QEMUIOVector *qiov,
-                                          BdrvRequestFlags flags,
-                                          BlockCompletionFunc *cb,
-                                          void *opaque,
-                                          bool is_write)
-{
-    Coroutine *co;
-    BlockAIOCBCoroutine *acb;
-
-    /* Matched by bdrv_co_complete's bdrv_dec_in_flight.  */
-    bdrv_inc_in_flight(child->bs);
-
-    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, child->bs, cb, opaque);
-    acb->child = child;
-    acb->need_bh = true;
-    acb->req.error = -EINPROGRESS;
-    acb->req.offset = offset;
-    acb->req.qiov = qiov;
-    acb->req.flags = flags;
-    acb->is_write = is_write;
-
-    co = qemu_coroutine_create(bdrv_co_do_rw, acb);
-    bdrv_coroutine_enter(child->bs, co);
-
-    bdrv_co_maybe_schedule_bh(acb);
-    return &acb->common;
-}
-
-static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
-{
-    BlockAIOCBCoroutine *acb = opaque;
-    BlockDriverState *bs = acb->common.bs;
-
-    acb->req.error = bdrv_co_flush(bs);
-    bdrv_co_complete(acb);
-}
-
-BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    trace_bdrv_aio_flush(bs, opaque);
-
-    Coroutine *co;
-    BlockAIOCBCoroutine *acb;
-
-    /* Matched by bdrv_co_complete's bdrv_dec_in_flight.  */
-    bdrv_inc_in_flight(bs);
-
-    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
-    acb->need_bh = true;
-    acb->req.error = -EINPROGRESS;
-
-    co = qemu_coroutine_create(bdrv_aio_flush_co_entry, acb);
-    bdrv_coroutine_enter(bs, co);
-
-    bdrv_co_maybe_schedule_bh(acb);
-    return &acb->common;
-}
-
 /**************************************************************/
 /* Coroutine block device emulation */

@@ -2414,18 +2294,18 @@ int bdrv_flush(BlockDriverState *bs)
 typedef struct DiscardCo {
    BlockDriverState *bs;
    int64_t offset;
-    int count;
+    int bytes;
    int ret;
 } DiscardCo;
 static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque)
 {
    DiscardCo *rwco = opaque;

-    rwco->ret = bdrv_co_pdiscard(rwco->bs, rwco->offset, rwco->count);
+    rwco->ret = bdrv_co_pdiscard(rwco->bs, rwco->offset, rwco->bytes);
 }

 int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
-                                  int count)
+                                  int bytes)
 {
    BdrvTrackedRequest req;
    int max_pdiscard, ret;
@@ -2435,7 +2315,11 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
        return -ENOMEDIUM;
    }

-    ret = bdrv_check_byte_request(bs, offset, count);
+    if (bdrv_has_readonly_bitmaps(bs)) {
+        return -EPERM;
+    }
+
+    ret = bdrv_check_byte_request(bs, offset, bytes);
    if (ret < 0) {
        return ret;
    } else if (bs->read_only) {
@@ -2460,10 +2344,10 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
    align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
    assert(align % bs->bl.request_alignment == 0);
    head = offset % align;
-    tail = (offset + count) % align;
+    tail = (offset + bytes) % align;

    bdrv_inc_in_flight(bs);
-    tracked_request_begin(&req, bs, offset, count, BDRV_TRACKED_DISCARD);
+    tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_DISCARD);

    ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req);
    if (ret < 0) {
@@ -2474,13 +2358,12 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
                                   align);
    assert(max_pdiscard >= bs->bl.request_alignment);

-    while (count > 0) {
-        int ret;
-        int num = count;
+    while (bytes > 0) {
+        int num = bytes;

        if (head) {
            /* Make small requests to get to alignment boundaries. */
-            num = MIN(count, align - head);
+            num = MIN(bytes, align - head);
            if (!QEMU_IS_ALIGNED(num, bs->bl.request_alignment)) {
                num %= bs->bl.request_alignment;
            }
@@ -2524,7 +2407,7 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
        }

        offset += num;
-        count -= num;
+        bytes -= num;
    }
    ret = 0;
 out:
@@ -2536,13 +2419,13 @@ out:
    return ret;
 }

-int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int count)
+int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
 {
    Coroutine *co;
    DiscardCo rwco = {
        .bs = bs,
        .offset = offset,
-        .count = count,
+        .bytes = bytes,
        .ret = NOT_DONE,
    };

--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1116,14 +1116,14 @@ iscsi_getlength(BlockDriverState *bs)
 }

 static int
-coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
+coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
 {
    IscsiLun *iscsilun = bs->opaque;
    struct IscsiTask iTask;
    struct unmap_list list;
    int r = 0;

-    if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
+    if (!is_byte_request_lun_aligned(offset, bytes, iscsilun)) {
        return -ENOTSUP;
    }

@@ -1133,7 +1133,7 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
    }

    list.lba = offset / iscsilun->block_size;
-    list.num = count / iscsilun->block_size;
+    list.num = bytes / iscsilun->block_size;

    iscsi_co_init_iscsitask(iscsilun, &iTask);
    qemu_mutex_lock(&iscsilun->mutex);
@@ -1174,7 +1174,7 @@ retry:
    }

    iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
-                               count >> BDRV_SECTOR_BITS);
+                               bytes >> BDRV_SECTOR_BITS);

 out_unlock:
    qemu_mutex_unlock(&iscsilun->mutex);
@@ -1183,7 +1183,7 @@ out_unlock:

 static int
 coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
-                                    int count, BdrvRequestFlags flags)
+                                    int bytes, BdrvRequestFlags flags)
 {
    IscsiLun *iscsilun = bs->opaque;
    struct IscsiTask iTask;
@@ -1192,7 +1192,7 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
    bool use_16_for_ws = iscsilun->use_16_for_rw;
    int r = 0;

-    if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
+    if (!is_byte_request_lun_aligned(offset, bytes, iscsilun)) {
        return -ENOTSUP;
    }

@@ -1215,7 +1215,7 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
    }

    lba = offset / iscsilun->block_size;
-    nb_blocks = count / iscsilun->block_size;
+    nb_blocks = bytes / iscsilun->block_size;

    if (iscsilun->zeroblock == NULL) {
        iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
@@ -1273,17 +1273,17 @@ retry:

    if (iTask.status != SCSI_STATUS_GOOD) {
        iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
-                                   count >> BDRV_SECTOR_BITS);
+                                   bytes >> BDRV_SECTOR_BITS);
        r = iTask.err_code;
        goto out_unlock;
    }

    if (flags & BDRV_REQ_MAY_UNMAP) {
        iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
-                                   count >> BDRV_SECTOR_BITS);
+                                   bytes >> BDRV_SECTOR_BITS);
    } else {
        iscsi_allocmap_set_allocated(iscsilun, offset >> BDRV_SECTOR_BITS,
-                                     count >> BDRV_SECTOR_BITS);
+                                     bytes >> BDRV_SECTOR_BITS);
    }

 out_unlock:
@@ -1761,9 +1761,9 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
     * filename encoded options */
    filename = qdict_get_try_str(options, "filename");
    if (filename) {
-        error_report("Warning: 'filename' option specified. "
-                      "This is an unsupported option, and may be deprecated "
-                      "in the future");
+        warn_report("'filename' option specified. "
+                    "This is an unsupported option, and may be deprecated "
+                    "in the future");
        iscsi_parse_filename(filename, options, &local_err);
        if (local_err) {
            ret = -EINVAL;
@@ -2079,11 +2079,18 @@ static void iscsi_reopen_commit(BDRVReopenState *reopen_state)
    }
 }

-static int iscsi_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+static int iscsi_truncate(BlockDriverState *bs, int64_t offset,
+                          PreallocMode prealloc, Error **errp)
 {
    IscsiLun *iscsilun = bs->opaque;
    Error *local_err = NULL;

+    if (prealloc != PREALLOC_MODE_OFF) {
+        error_setg(errp, "Unsupported preallocation mode '%s'",
+                   PreallocMode_lookup[prealloc]);
+        return -ENOTSUP;
+    }
+
    if (iscsilun->type != TYPE_DISK) {
        error_setg(errp, "Cannot resize non-disk iSCSI devices");
        return -ENOTSUP;
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -24,9 +24,8 @@

 #define SLICE_TIME    100000000ULL /* ns */
 #define MAX_IN_FLIGHT 16
-#define MAX_IO_SECTORS ((1 << 20) >> BDRV_SECTOR_BITS) /* 1 Mb */
-#define DEFAULT_MIRROR_BUF_SIZE \
-    (MAX_IN_FLIGHT * MAX_IO_SECTORS * BDRV_SECTOR_SIZE)
+#define MAX_IO_BYTES (1 << 20) /* 1 Mb */
+#define DEFAULT_MIRROR_BUF_SIZE (MAX_IN_FLIGHT * MAX_IO_BYTES)

 /* The mirroring buffer is a list of granularity-sized chunks.
 * Free chunks are organized in a list.
@@ -67,11 +66,11 @@ typedef struct MirrorBlockJob {
    uint64_t last_pause_ns;
    unsigned long *in_flight_bitmap;
    int in_flight;
-    int64_t sectors_in_flight;
+    int64_t bytes_in_flight;
    int ret;
    bool unmap;
    bool waiting_for_io;
-    int target_cluster_sectors;
+    int target_cluster_size;
    int max_iov;
    bool initial_zeroing_ongoing;
 } MirrorBlockJob;
@@ -79,8 +78,8 @@ typedef struct MirrorBlockJob {
 typedef struct MirrorOp {
    MirrorBlockJob *s;
    QEMUIOVector qiov;
-    int64_t sector_num;
-    int nb_sectors;
+    int64_t offset;
+    uint64_t bytes;
 } MirrorOp;

 static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
@@ -101,12 +100,12 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
    MirrorBlockJob *s = op->s;
    struct iovec *iov;
    int64_t chunk_num;
-    int i, nb_chunks, sectors_per_chunk;
+    int i, nb_chunks;

-    trace_mirror_iteration_done(s, op->sector_num, op->nb_sectors, ret);
+    trace_mirror_iteration_done(s, op->offset, op->bytes, ret);

    s->in_flight--;
-    s->sectors_in_flight -= op->nb_sectors;
+    s->bytes_in_flight -= op->bytes;
    iov = op->qiov.iov;
    for (i = 0; i < op->qiov.niov; i++) {
        MirrorBuffer *buf = (MirrorBuffer *) iov[i].iov_base;
@@ -114,16 +113,15 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
        s->buf_free_count++;
    }

-    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
-    chunk_num = op->sector_num / sectors_per_chunk;
-    nb_chunks = DIV_ROUND_UP(op->nb_sectors, sectors_per_chunk);
+    chunk_num = op->offset / s->granularity;
+    nb_chunks = DIV_ROUND_UP(op->bytes, s->granularity);
    bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks);
    if (ret >= 0) {
        if (s->cow_bitmap) {
            bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
        }
        if (!s->initial_zeroing_ongoing) {
-            s->common.offset += (uint64_t)op->nb_sectors * BDRV_SECTOR_SIZE;
+            s->common.offset += op->bytes;
        }
    }
    qemu_iovec_destroy(&op->qiov);
@@ -143,7 +141,8 @@ static void mirror_write_complete(void *opaque, int ret)
    if (ret < 0) {
        BlockErrorAction action;

-        bdrv_set_dirty_bitmap(s->dirty_bitmap, op->sector_num, op->nb_sectors);
+        bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset >> BDRV_SECTOR_BITS,
+                              op->bytes >> BDRV_SECTOR_BITS);
        action = mirror_error_action(s, false, -ret);
        if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
            s->ret = ret;
@@ -162,7 +161,8 @@ static void mirror_read_complete(void *opaque, int ret)
    if (ret < 0) {
        BlockErrorAction action;

-        bdrv_set_dirty_bitmap(s->dirty_bitmap, op->sector_num, op->nb_sectors);
+        bdrv_set_dirty_bitmap(s->dirty_bitmap, op->offset >> BDRV_SECTOR_BITS,
+                              op->bytes >> BDRV_SECTOR_BITS);
        action = mirror_error_action(s, true, -ret);
        if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
            s->ret = ret;
@@ -170,56 +170,53 @@ static void mirror_read_complete(void *opaque, int ret)

        mirror_iteration_done(op, ret);
    } else {
-        blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
+        blk_aio_pwritev(s->target, op->offset, &op->qiov,
                        0, mirror_write_complete, op);
    }
    aio_context_release(blk_get_aio_context(s->common.blk));
 }

-static inline void mirror_clip_sectors(MirrorBlockJob *s,
-                                       int64_t sector_num,
-                                       int *nb_sectors)
+/* Clip bytes relative to offset to not exceed end-of-file */
+static inline int64_t mirror_clip_bytes(MirrorBlockJob *s,
+                                        int64_t offset,
+                                        int64_t bytes)
 {
-    *nb_sectors = MIN(*nb_sectors,
-                      s->bdev_length / BDRV_SECTOR_SIZE - sector_num);
+    return MIN(bytes, s->bdev_length - offset);
 }

-/* Round sector_num and/or nb_sectors to target cluster if COW is needed, and
- * return the offset of the adjusted tail sector against original. */
-static int mirror_cow_align(MirrorBlockJob *s,
-                            int64_t *sector_num,
-                            int *nb_sectors)
+/* Round offset and/or bytes to target cluster if COW is needed, and
+ * return the offset of the adjusted tail against original. */
+static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
+                            uint64_t *bytes)
 {
    bool need_cow;
    int ret = 0;
-    int chunk_sectors = s->granularity >> BDRV_SECTOR_BITS;
-    int64_t align_sector_num = *sector_num;
-    int align_nb_sectors = *nb_sectors;
-    int max_sectors = chunk_sectors * s->max_iov;
+    int64_t align_offset = *offset;
+    unsigned int align_bytes = *bytes;
+    int max_bytes = s->granularity * s->max_iov;

-    need_cow = !test_bit(*sector_num / chunk_sectors, s->cow_bitmap);
-    need_cow |= !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors,
+    assert(*bytes < INT_MAX);
+    need_cow = !test_bit(*offset / s->granularity, s->cow_bitmap);
+    need_cow |= !test_bit((*offset + *bytes - 1) / s->granularity,
                          s->cow_bitmap);
    if (need_cow) {
-        bdrv_round_sectors_to_clusters(blk_bs(s->target), *sector_num,
-                                       *nb_sectors, &align_sector_num,
-                                       &align_nb_sectors);
+        bdrv_round_to_clusters(blk_bs(s->target), *offset, *bytes,
+                               &align_offset, &align_bytes);
    }

-    if (align_nb_sectors > max_sectors) {
-        align_nb_sectors = max_sectors;
+    if (align_bytes > max_bytes) {
+        align_bytes = max_bytes;
        if (need_cow) {
-            align_nb_sectors = QEMU_ALIGN_DOWN(align_nb_sectors,
-                                               s->target_cluster_sectors);
+            align_bytes = QEMU_ALIGN_DOWN(align_bytes, s->target_cluster_size);
        }
    }
-    /* Clipping may result in align_nb_sectors unaligned to chunk boundary, but
+    /* Clipping may result in align_bytes unaligned to chunk boundary, but
     * that doesn't matter because it's already the end of source image. */
-    mirror_clip_sectors(s, align_sector_num, &align_nb_sectors);
+    align_bytes = mirror_clip_bytes(s, align_offset, align_bytes);

-    ret = align_sector_num + align_nb_sectors - (*sector_num + *nb_sectors);
-    *sector_num = align_sector_num;
-    *nb_sectors = align_nb_sectors;
+    ret = align_offset + align_bytes - (*offset + *bytes);
+    *offset = align_offset;
+    *bytes = align_bytes;
    assert(ret >= 0);
    return ret;
 }
@@ -233,50 +230,51 @@ static inline void mirror_wait_for_io(MirrorBlockJob *s)
 }

 /* Submit async read while handling COW.
- * Returns: The number of sectors copied after and including sector_num,
- *          excluding any sectors copied prior to sector_num due to alignment.
- *          This will be nb_sectors if no alignment is necessary, or
- *          (new_end - sector_num) if tail is rounded up or down due to
+ * Returns: The number of bytes copied after and including offset,
+ *          excluding any bytes copied prior to offset due to alignment.
+ *          This will be @bytes if no alignment is necessary, or
+ *          (new_end - offset) if tail is rounded up or down due to
 *          alignment or buffer limit.
 */
-static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
-                          int nb_sectors)
+static uint64_t mirror_do_read(MirrorBlockJob *s, int64_t offset,
+                               uint64_t bytes)
 {
    BlockBackend *source = s->common.blk;
-    int sectors_per_chunk, nb_chunks;
-    int ret;
+    int nb_chunks;
+    uint64_t ret;
    MirrorOp *op;
-    int max_sectors;
+    uint64_t max_bytes;

-    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
-    max_sectors = sectors_per_chunk * s->max_iov;
+    max_bytes = s->granularity * s->max_iov;

    /* We can only handle as much as buf_size at a time. */
-    nb_sectors = MIN(s->buf_size >> BDRV_SECTOR_BITS, nb_sectors);
-    nb_sectors = MIN(max_sectors, nb_sectors);
-    assert(nb_sectors);
-    ret = nb_sectors;
+    bytes = MIN(s->buf_size, MIN(max_bytes, bytes));
+    assert(bytes);
+    assert(bytes < BDRV_REQUEST_MAX_BYTES);
+    ret = bytes;

    if (s->cow_bitmap) {
-        ret += mirror_cow_align(s, &sector_num, &nb_sectors);
+        ret += mirror_cow_align(s, &offset, &bytes);
    }
-    assert(nb_sectors << BDRV_SECTOR_BITS <= s->buf_size);
-    /* The sector range must meet granularity because:
+    assert(bytes <= s->buf_size);
+    /* The offset is granularity-aligned because:
     * 1) Caller passes in aligned values;
     * 2) mirror_cow_align is used only when target cluster is larger. */
-    assert(!(sector_num % sectors_per_chunk));
-    nb_chunks = DIV_ROUND_UP(nb_sectors, sectors_per_chunk);
+    assert(QEMU_IS_ALIGNED(offset, s->granularity));
+    /* The range is sector-aligned, since bdrv_getlength() rounds up. */
+    assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
+    nb_chunks = DIV_ROUND_UP(bytes, s->granularity);

    while (s->buf_free_count < nb_chunks) {
-        trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
+        trace_mirror_yield_in_flight(s, offset, s->in_flight);
        mirror_wait_for_io(s);
    }

    /* Allocate a MirrorOp that is used as an AIO callback.  */
    op = g_new(MirrorOp, 1);
    op->s = s;
-    op->sector_num = sector_num;
-    op->nb_sectors = nb_sectors;
+    op->offset = offset;
+    op->bytes = bytes;

    /* Now make a QEMUIOVector taking enough granularity-sized chunks
     * from s->buf_free.
@@ -284,7 +282,7 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
    qemu_iovec_init(&op->qiov, nb_chunks);
    while (nb_chunks-- > 0) {
        MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
-        size_t remaining = nb_sectors * BDRV_SECTOR_SIZE - op->qiov.size;
+        size_t remaining = bytes - op->qiov.size;

        QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
        s->buf_free_count--;
@@ -293,17 +291,16 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,

    /* Copy the dirty cluster.  */
    s->in_flight++;
-    s->sectors_in_flight += nb_sectors;
-    trace_mirror_one_iteration(s, sector_num, nb_sectors);
+    s->bytes_in_flight += bytes;
+    trace_mirror_one_iteration(s, offset, bytes);

-    blk_aio_preadv(source, sector_num * BDRV_SECTOR_SIZE, &op->qiov, 0,
-                   mirror_read_complete, op);
+    blk_aio_preadv(source, offset, &op->qiov, 0, mirror_read_complete, op);
    return ret;
 }

 static void mirror_do_zero_or_discard(MirrorBlockJob *s,
-                                      int64_t sector_num,
-                                      int nb_sectors,
+                                      int64_t offset,
+                                      uint64_t bytes,
                                      bool is_discard)
 {
    MirrorOp *op;
@@ -312,19 +309,17 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
     * so the freeing in mirror_iteration_done is nop. */
    op = g_new0(MirrorOp, 1);
    op->s = s;
-    op->sector_num = sector_num;
-    op->nb_sectors = nb_sectors;
+    op->offset = offset;
+    op->bytes = bytes;

    s->in_flight++;
-    s->sectors_in_flight += nb_sectors;
+    s->bytes_in_flight += bytes;
    if (is_discard) {
-        blk_aio_pdiscard(s->target, sector_num << BDRV_SECTOR_BITS,
-                         op->nb_sectors << BDRV_SECTOR_BITS,
-                         mirror_write_complete, op);
+        blk_aio_pdiscard(s->target, offset,
+                         op->bytes, mirror_write_complete, op);
    } else {
-        blk_aio_pwrite_zeroes(s->target, sector_num * BDRV_SECTOR_SIZE,
-                              op->nb_sectors * BDRV_SECTOR_SIZE,
-                              s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
+        blk_aio_pwrite_zeroes(s->target, offset,
+                              op->bytes, s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
                              mirror_write_complete, op);
    }
 }
@@ -332,29 +327,28 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
 static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
 {
    BlockDriverState *source = s->source;
-    int64_t sector_num, first_chunk;
+    int64_t offset, first_chunk;
    uint64_t delay_ns = 0;
    /* At least the first dirty chunk is mirrored in one iteration. */
    int nb_chunks = 1;
-    int64_t end = s->bdev_length / BDRV_SECTOR_SIZE;
    int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
    bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target));
-    int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) / MAX_IN_FLIGHT,
-                             MAX_IO_SECTORS);
+    int max_io_bytes = MAX(s->buf_size / MAX_IN_FLIGHT, MAX_IO_BYTES);

    bdrv_dirty_bitmap_lock(s->dirty_bitmap);
-    sector_num = bdrv_dirty_iter_next(s->dbi);
-    if (sector_num < 0) {
+    offset = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE;
+    if (offset < 0) {
        bdrv_set_dirty_iter(s->dbi, 0);
-        sector_num = bdrv_dirty_iter_next(s->dbi);
-        trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
-        assert(sector_num >= 0);
+        offset = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE;
+        trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap) *
+                                  BDRV_SECTOR_SIZE);
+        assert(offset >= 0);
    }
    bdrv_dirty_bitmap_unlock(s->dirty_bitmap);

-    first_chunk = sector_num / sectors_per_chunk;
+    first_chunk = offset / s->granularity;
    while (test_bit(first_chunk, s->in_flight_bitmap)) {
-        trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
+        trace_mirror_yield_in_flight(s, offset, s->in_flight);
        mirror_wait_for_io(s);
    }

@@ -363,25 +357,26 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
    /* Find the number of consective dirty chunks following the first dirty
     * one, and wait for in flight requests in them. */
    bdrv_dirty_bitmap_lock(s->dirty_bitmap);
-    while (nb_chunks * sectors_per_chunk < (s->buf_size >> BDRV_SECTOR_BITS)) {
+    while (nb_chunks * s->granularity < s->buf_size) {
        int64_t next_dirty;
-        int64_t next_sector = sector_num + nb_chunks * sectors_per_chunk;
-        int64_t next_chunk = next_sector / sectors_per_chunk;
-        if (next_sector >= end ||
-            !bdrv_get_dirty_locked(source, s->dirty_bitmap, next_sector)) {
+        int64_t next_offset = offset + nb_chunks * s->granularity;
+        int64_t next_chunk = next_offset / s->granularity;
+        if (next_offset >= s->bdev_length ||
+            !bdrv_get_dirty_locked(source, s->dirty_bitmap,
+                                   next_offset >> BDRV_SECTOR_BITS)) {
            break;
        }
        if (test_bit(next_chunk, s->in_flight_bitmap)) {
            break;
        }

-        next_dirty = bdrv_dirty_iter_next(s->dbi);
-        if (next_dirty > next_sector || next_dirty < 0) {
+        next_dirty = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE;
+        if (next_dirty > next_offset || next_dirty < 0) {
            /* The bitmap iterator's cache is stale, refresh it */
-            bdrv_set_dirty_iter(s->dbi, next_sector);
-            next_dirty = bdrv_dirty_iter_next(s->dbi);
+            bdrv_set_dirty_iter(s->dbi, next_offset >> BDRV_SECTOR_BITS);
+            next_dirty = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE;
        }
-        assert(next_dirty == next_sector);
+        assert(next_dirty == next_offset);
        nb_chunks++;
    }

@@ -389,14 +384,16 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
     * calling bdrv_get_block_status_above could yield - if some blocks are
     * marked dirty in this window, we need to know.
     */
-    bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, sector_num,
-                                  nb_chunks * sectors_per_chunk);
+    bdrv_reset_dirty_bitmap_locked(s->dirty_bitmap, offset >> BDRV_SECTOR_BITS,
+                                   nb_chunks * sectors_per_chunk);
    bdrv_dirty_bitmap_unlock(s->dirty_bitmap);

-    bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
-    while (nb_chunks > 0 && sector_num < end) {
+    bitmap_set(s->in_flight_bitmap, offset / s->granularity, nb_chunks);
+    while (nb_chunks > 0 && offset < s->bdev_length) {
        int64_t ret;
-        int io_sectors, io_sectors_acct;
+        int io_sectors;
+        unsigned int io_bytes;
+        int64_t io_bytes_acct;
        BlockDriverState *file;
        enum MirrorMethod {
            MIRROR_METHOD_COPY,
@@ -404,27 +401,28 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
            MIRROR_METHOD_DISCARD
        } mirror_method = MIRROR_METHOD_COPY;

-        assert(!(sector_num % sectors_per_chunk));
-        ret = bdrv_get_block_status_above(source, NULL, sector_num,
+        assert(!(offset % s->granularity));
+        ret = bdrv_get_block_status_above(source, NULL,
+                                          offset >> BDRV_SECTOR_BITS,
                                          nb_chunks * sectors_per_chunk,
                                          &io_sectors, &file);
+        io_bytes = io_sectors * BDRV_SECTOR_SIZE;
        if (ret < 0) {
-            io_sectors = MIN(nb_chunks * sectors_per_chunk, max_io_sectors);
+            io_bytes = MIN(nb_chunks * s->granularity, max_io_bytes);
        } else if (ret & BDRV_BLOCK_DATA) {
-            io_sectors = MIN(io_sectors, max_io_sectors);
+            io_bytes = MIN(io_bytes, max_io_bytes);
        }

-        io_sectors -= io_sectors % sectors_per_chunk;
-        if (io_sectors < sectors_per_chunk) {
-            io_sectors = sectors_per_chunk;
+        io_bytes -= io_bytes % s->granularity;
+        if (io_bytes < s->granularity) {
+            io_bytes = s->granularity;
        } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
-            int64_t target_sector_num;
-            int target_nb_sectors;
-            bdrv_round_sectors_to_clusters(blk_bs(s->target), sector_num,
-                                           io_sectors,  &target_sector_num,
-                                           &target_nb_sectors);
-            if (target_sector_num == sector_num &&
-                target_nb_sectors == io_sectors) {
+            int64_t target_offset;
+            unsigned int target_bytes;
+            bdrv_round_to_clusters(blk_bs(s->target), offset, io_bytes,
+                                   &target_offset, &target_bytes);
+            if (target_offset == offset &&
+                target_bytes == io_bytes) {
                mirror_method = ret & BDRV_BLOCK_ZERO ?
                                    MIRROR_METHOD_ZERO :
                                    MIRROR_METHOD_DISCARD;
@@ -432,7 +430,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
        }

        while (s->in_flight >= MAX_IN_FLIGHT) {
-            trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
+            trace_mirror_yield_in_flight(s, offset, s->in_flight);
            mirror_wait_for_io(s);
        }

@@ -440,30 +438,29 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
            return 0;
        }

-        mirror_clip_sectors(s, sector_num, &io_sectors);
+        io_bytes = mirror_clip_bytes(s, offset, io_bytes);
        switch (mirror_method) {
        case MIRROR_METHOD_COPY:
-            io_sectors = mirror_do_read(s, sector_num, io_sectors);
-            io_sectors_acct = io_sectors;
+            io_bytes = io_bytes_acct = mirror_do_read(s, offset, io_bytes);
            break;
        case MIRROR_METHOD_ZERO:
        case MIRROR_METHOD_DISCARD:
-            mirror_do_zero_or_discard(s, sector_num, io_sectors,
+            mirror_do_zero_or_discard(s, offset, io_bytes,
                                      mirror_method == MIRROR_METHOD_DISCARD);
            if (write_zeroes_ok) {
-                io_sectors_acct = 0;
+                io_bytes_acct = 0;
            } else {
-                io_sectors_acct = io_sectors;
+                io_bytes_acct = io_bytes;
            }
            break;
        default:
            abort();
        }
-        assert(io_sectors);
-        sector_num += io_sectors;
-        nb_chunks -= DIV_ROUND_UP(io_sectors, sectors_per_chunk);
+        assert(io_bytes);
+        offset += io_bytes;
+        nb_chunks -= DIV_ROUND_UP(io_bytes, s->granularity);
        if (s->common.speed) {
-            delay_ns = ratelimit_calculate_delay(&s->limit, io_sectors_acct);
+            delay_ns = ratelimit_calculate_delay(&s->limit, io_bytes_acct);
        }
    }
    return delay_ns;
@@ -624,6 +621,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
    BlockDriverState *bs = s->source;
    BlockDriverState *target_bs = blk_bs(s->target);
    int ret, n;
+    int64_t count;

    end = s->bdev_length / BDRV_SECTOR_SIZE;

@@ -652,7 +650,8 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
                continue;
            }

-            mirror_do_zero_or_discard(s, sector_num, nb_sectors, false);
+            mirror_do_zero_or_discard(s, sector_num * BDRV_SECTOR_SIZE,
+                                      nb_sectors * BDRV_SECTOR_SIZE, false);
            sector_num += nb_sectors;
        }

@@ -672,11 +671,16 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
            return 0;
        }

-        ret = bdrv_is_allocated_above(bs, base, sector_num, nb_sectors, &n);
+        ret = bdrv_is_allocated_above(bs, base, sector_num * BDRV_SECTOR_SIZE,
+                                      nb_sectors * BDRV_SECTOR_SIZE, &count);
        if (ret < 0) {
            return ret;
        }

+        /* TODO: Relax this once bdrv_is_allocated_above and dirty
+         * bitmaps no longer require sector alignment. */
+        assert(QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE));
+        n = count >> BDRV_SECTOR_BITS;
        assert(n > 0);
        if (ret == 1) {
            bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
@@ -712,7 +716,6 @@ static void coroutine_fn mirror_run(void *opaque)
    char backing_filename[2]; /* we only need 2 characters because we are only
                                 checking for a NULL string */
    int ret = 0;
-    int target_cluster_size = BDRV_SECTOR_SIZE;

    if (block_job_is_cancelled(&s->common)) {
        goto immediate_exit;
@@ -736,7 +739,8 @@ static void coroutine_fn mirror_run(void *opaque)
        }

        if (s->bdev_length > base_length) {
-            ret = blk_truncate(s->target, s->bdev_length, NULL);
+            ret = blk_truncate(s->target, s->bdev_length, PREALLOC_MODE_OFF,
+                               NULL);
            if (ret < 0) {
                goto immediate_exit;
            }
@@ -764,14 +768,15 @@ static void coroutine_fn mirror_run(void *opaque)
    bdrv_get_backing_filename(target_bs, backing_filename,
                              sizeof(backing_filename));
    if (!bdrv_get_info(target_bs, &bdi) && bdi.cluster_size) {
-        target_cluster_size = bdi.cluster_size;
+        s->target_cluster_size = bdi.cluster_size;
+    } else {
+        s->target_cluster_size = BDRV_SECTOR_SIZE;
    }
-    if (backing_filename[0] && !target_bs->backing
-        && s->granularity < target_cluster_size) {
-        s->buf_size = MAX(s->buf_size, target_cluster_size);
+    if (backing_filename[0] && !target_bs->backing &&
+        s->granularity < s->target_cluster_size) {
+        s->buf_size = MAX(s->buf_size, s->target_cluster_size);
        s->cow_bitmap = bitmap_new(length);
    }
-    s->target_cluster_sectors = target_cluster_size >> BDRV_SECTOR_BITS;
    s->max_iov = MIN(bs->bl.max_iov, target_bs->bl.max_iov);

    s->buf = qemu_try_blockalign(bs, s->buf_size);
@@ -807,10 +812,10 @@ static void coroutine_fn mirror_run(void *opaque)
        cnt = bdrv_get_dirty_count(s->dirty_bitmap);
        /* s->common.offset contains the number of bytes already processed so
         * far, cnt is the number of dirty sectors remaining and
-         * s->sectors_in_flight is the number of sectors currently being
+         * s->bytes_in_flight is the number of bytes currently being
         * processed; together those are the current total operation length */
-        s->common.len = s->common.offset +
-                        (cnt + s->sectors_in_flight) * BDRV_SECTOR_SIZE;
+        s->common.len = s->common.offset + s->bytes_in_flight +
+            cnt * BDRV_SECTOR_SIZE;

        /* Note that even when no rate limit is applied we need to yield
         * periodically with no pending I/O so that bdrv_drain_all() returns.
@@ -822,7 +827,8 @@ static void coroutine_fn mirror_run(void *opaque)
            s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
            if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
                (cnt == 0 && s->in_flight > 0)) {
-                trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight);
+                trace_mirror_yield(s, cnt * BDRV_SECTOR_SIZE,
+                                   s->buf_free_count, s->in_flight);
                mirror_wait_for_io(s);
                continue;
            } else if (cnt != 0) {
@@ -863,7 +869,7 @@ static void coroutine_fn mirror_run(void *opaque)
             * whether to switch to target check one last time if I/O has
             * come in the meanwhile, and if not flush the data to disk.
             */
-            trace_mirror_before_drain(s, cnt);
+            trace_mirror_before_drain(s, cnt * BDRV_SECTOR_SIZE);

            bdrv_drained_begin(bs);
            cnt = bdrv_get_dirty_count(s->dirty_bitmap);
@@ -882,7 +888,8 @@ static void coroutine_fn mirror_run(void *opaque)
        }

        ret = 0;
-        trace_mirror_before_sleep(s, cnt, s->synced, delay_ns);
+        trace_mirror_before_sleep(s, cnt * BDRV_SECTOR_SIZE,
+                                  s->synced, delay_ns);
        if (!s->synced) {
            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
            if (block_job_is_cancelled(&s->common)) {
@@ -929,7 +936,7 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
        error_setg(errp, QERR_INVALID_PARAMETER, "speed");
        return;
    }
-    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
+    ratelimit_set_speed(&s->limit, speed, SLICE_TIME);
 }

 static void mirror_complete(BlockJob *job, Error **errp)
@@ -1058,20 +1065,20 @@ static int64_t coroutine_fn bdrv_mirror_top_get_block_status(
 {
    *pnum = nb_sectors;
    *file = bs->backing->bs;
-    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
+    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
           (sector_num << BDRV_SECTOR_BITS);
 }

 static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs,
-    int64_t offset, int count, BdrvRequestFlags flags)
+    int64_t offset, int bytes, BdrvRequestFlags flags)
 {
-    return bdrv_co_pwrite_zeroes(bs->backing, offset, count, flags);
+    return bdrv_co_pwrite_zeroes(bs->backing, offset, bytes, flags);
 }

 static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs,
-    int64_t offset, int count)
+    int64_t offset, int bytes)
 {
-    return bdrv_co_pdiscard(bs->backing->bs, offset, count);
+    return bdrv_co_pdiscard(bs->backing->bs, offset, bytes);
 }

 static void bdrv_mirror_top_refresh_filename(BlockDriverState *bs, QDict *opts)
@@ -1141,6 +1148,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
    }

    assert ((granularity & (granularity - 1)) == 0);
+    /* Granularity must be large enough for sector-based dirty bitmap */
+    assert(granularity >= BDRV_SECTOR_SIZE);

    if (buf_size < 0) {
        error_setg(errp, "Invalid parameter 'buf-size'");
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -242,7 +242,7 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
    ssize_t ret;

    if (flags & BDRV_REQ_FUA) {
-        assert(client->nbdflags & NBD_FLAG_SEND_FUA);
+        assert(client->info.flags & NBD_FLAG_SEND_FUA);
        request.flags |= NBD_CMD_FLAG_FUA;
    }

@@ -259,23 +259,23 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
 }

 int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
-                                int count, BdrvRequestFlags flags)
+                                int bytes, BdrvRequestFlags flags)
 {
    ssize_t ret;
    NBDClientSession *client = nbd_get_client_session(bs);
    NBDRequest request = {
        .type = NBD_CMD_WRITE_ZEROES,
        .from = offset,
-        .len = count,
+        .len = bytes,
    };
    NBDReply reply;

-    if (!(client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES)) {
+    if (!(client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES)) {
        return -ENOTSUP;
    }

    if (flags & BDRV_REQ_FUA) {
-        assert(client->nbdflags & NBD_FLAG_SEND_FUA);
+        assert(client->info.flags & NBD_FLAG_SEND_FUA);
        request.flags |= NBD_CMD_FLAG_FUA;
    }
    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
@@ -299,7 +299,7 @@ int nbd_client_co_flush(BlockDriverState *bs)
    NBDReply reply;
    ssize_t ret;

-    if (!(client->nbdflags & NBD_FLAG_SEND_FLUSH)) {
+    if (!(client->info.flags & NBD_FLAG_SEND_FLUSH)) {
        return 0;
    }

@@ -316,18 +316,18 @@ int nbd_client_co_flush(BlockDriverState *bs)
    return -reply.error;
 }

-int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
+int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
 {
    NBDClientSession *client = nbd_get_client_session(bs);
    NBDRequest request = {
        .type = NBD_CMD_TRIM,
        .from = offset,
-        .len = count,
+        .len = bytes,
    };
    NBDReply reply;
    ssize_t ret;

-    if (!(client->nbdflags & NBD_FLAG_SEND_TRIM)) {
+    if (!(client->info.flags & NBD_FLAG_SEND_TRIM)) {
        return 0;
    }

@@ -345,14 +345,14 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
 void nbd_client_detach_aio_context(BlockDriverState *bs)
 {
    NBDClientSession *client = nbd_get_client_session(bs);
-    qio_channel_detach_aio_context(QIO_CHANNEL(client->sioc));
+    qio_channel_detach_aio_context(QIO_CHANNEL(client->ioc));
 }

 void nbd_client_attach_aio_context(BlockDriverState *bs,
                                   AioContext *new_context)
 {
    NBDClientSession *client = nbd_get_client_session(bs);
-    qio_channel_attach_aio_context(QIO_CHANNEL(client->sioc), new_context);
+    qio_channel_attach_aio_context(QIO_CHANNEL(client->ioc), new_context);
    aio_co_schedule(new_context, client->read_reply_co);
 }

@@ -384,22 +384,24 @@ int nbd_client_init(BlockDriverState *bs,
    logout("session init %s\n", export);
    qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);

+    client->info.request_sizes = true;
    ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export,
-                                &client->nbdflags,
                                tlscreds, hostname,
-                                &client->ioc,
-                                &client->size, errp);
+                                &client->ioc, &client->info, errp);
    if (ret < 0) {
        logout("Failed to negotiate with the NBD server\n");
        return ret;
    }
-    if (client->nbdflags & NBD_FLAG_SEND_FUA) {
+    if (client->info.flags & NBD_FLAG_SEND_FUA) {
        bs->supported_write_flags = BDRV_REQ_FUA;
        bs->supported_zero_flags |= BDRV_REQ_FUA;
    }
-    if (client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES) {
+    if (client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES) {
        bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP;
    }
+    if (client->info.min_block > bs->bl.request_alignment) {
+        bs->bl.request_alignment = client->info.min_block;
+    }

    qemu_co_mutex_init(&client->send_mutex);
    qemu_co_queue_init(&client->free_sema);
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -20,8 +20,7 @@
 typedef struct NBDClientSession {
    QIOChannelSocket *sioc; /* The master data channel */
    QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
-    uint16_t nbdflags;
-    off_t size;
+    NBDExportInfo info;

    CoMutex send_mutex;
    CoQueue free_sema;
@@ -42,12 +41,12 @@ int nbd_client_init(BlockDriverState *bs,
                    Error **errp);
 void nbd_client_close(BlockDriverState *bs);

-int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count);
+int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes);
 int nbd_client_co_flush(BlockDriverState *bs);
 int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
                          uint64_t bytes, QEMUIOVector *qiov, int flags);
 int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
-                                int count, BdrvRequestFlags flags);
+                                int bytes, BdrvRequestFlags flags);
 int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
                         uint64_t bytes, QEMUIOVector *qiov, int flags);

--- a/block/nbd.c
+++ b/block/nbd.c
@@ -64,11 +64,11 @@ static int nbd_parse_uri(const char *filename, QDict *options)
    }

    /* transport */
-    if (!strcmp(uri->scheme, "nbd")) {
+    if (!g_strcmp0(uri->scheme, "nbd")) {
        is_unix = false;
-    } else if (!strcmp(uri->scheme, "nbd+tcp")) {
+    } else if (!g_strcmp0(uri->scheme, "nbd+tcp")) {
        is_unix = false;
-    } else if (!strcmp(uri->scheme, "nbd+unix")) {
+    } else if (!g_strcmp0(uri->scheme, "nbd+unix")) {
        is_unix = true;
    } else {
        ret = -EINVAL;
@@ -472,9 +472,17 @@ static int nbd_co_flush(BlockDriverState *bs)

 static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
 {
-    bs->bl.max_pdiscard = NBD_MAX_BUFFER_SIZE;
-    bs->bl.max_pwrite_zeroes = NBD_MAX_BUFFER_SIZE;
-    bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE;
+    NBDClientSession *s = nbd_get_client_session(bs);
+    uint32_t max = MIN_NON_ZERO(NBD_MAX_BUFFER_SIZE, s->info.max_block);
+
+    bs->bl.max_pdiscard = max;
+    bs->bl.max_pwrite_zeroes = max;
+    bs->bl.max_transfer = max;
+
+    if (s->info.opt_block &&
+        s->info.opt_block > bs->bl.opt_transfer) {
+        bs->bl.opt_transfer = s->info.opt_block;
+    }
 }

 static void nbd_close(BlockDriverState *bs)
@@ -492,7 +500,7 @@ static int64_t nbd_getlength(BlockDriverState *bs)
 {
    BDRVNBDState *s = bs->opaque;

-    return s->client.size;
+    return s->client.info.size;
 }

 static void nbd_detach_aio_context(BlockDriverState *bs)
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -82,7 +82,7 @@ static int nfs_parse_uri(const char *filename, QDict *options, Error **errp)
        error_setg(errp, "Invalid URI specified");
        goto out;
    }
-    if (strcmp(uri->scheme, "nfs") != 0) {
+    if (g_strcmp0(uri->scheme, "nfs") != 0) {
        error_setg(errp, "URI scheme must be 'nfs'");
        goto out;
    }
@@ -558,8 +558,8 @@ static int64_t nfs_client_open(NFSClient *client, QDict *options,
        }
        client->readahead = qemu_opt_get_number(opts, "readahead-size", 0);
        if (client->readahead > QEMU_NFS_MAX_READAHEAD_SIZE) {
-            error_report("NFS Warning: Truncating NFS readahead "
-                         "size to %d", QEMU_NFS_MAX_READAHEAD_SIZE);
+            warn_report("Truncating NFS readahead size to %d",
+                        QEMU_NFS_MAX_READAHEAD_SIZE);
            client->readahead = QEMU_NFS_MAX_READAHEAD_SIZE;
        }
        nfs_set_readahead(client->context, client->readahead);
@@ -579,8 +579,8 @@ static int64_t nfs_client_open(NFSClient *client, QDict *options,
        }
        client->pagecache = qemu_opt_get_number(opts, "page-cache-size", 0);
        if (client->pagecache > QEMU_NFS_MAX_PAGECACHE_SIZE) {
-            error_report("NFS Warning: Truncating NFS pagecache "
-                         "size to %d pages", QEMU_NFS_MAX_PAGECACHE_SIZE);
+            warn_report("Truncating NFS pagecache size to %d pages",
+                        QEMU_NFS_MAX_PAGECACHE_SIZE);
            client->pagecache = QEMU_NFS_MAX_PAGECACHE_SIZE;
        }
        nfs_set_pagecache(client->context, client->pagecache);
@@ -595,8 +595,8 @@ static int64_t nfs_client_open(NFSClient *client, QDict *options,
        /* limit the maximum debug level to avoid potential flooding
         * of our log files. */
        if (client->debug > QEMU_NFS_MAX_DEBUG_LEVEL) {
-            error_report("NFS Warning: Limiting NFS debug level "
-                         "to %d", QEMU_NFS_MAX_DEBUG_LEVEL);
+            warn_report("Limiting NFS debug level to %d",
+                        QEMU_NFS_MAX_DEBUG_LEVEL);
            client->debug = QEMU_NFS_MAX_DEBUG_LEVEL;
        }
        nfs_set_debug(client->context, client->debug);
@@ -759,11 +759,18 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
    return (task.ret < 0 ? task.ret : st.st_blocks * 512);
 }

-static int nfs_file_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+static int nfs_file_truncate(BlockDriverState *bs, int64_t offset,
+                             PreallocMode prealloc, Error **errp)
 {
    NFSClient *client = bs->opaque;
    int ret;

+    if (prealloc != PREALLOC_MODE_OFF) {
+        error_setg(errp, "Unsupported preallocation mode '%s'",
+                   PreallocMode_lookup[prealloc]);
+        return -ENOTSUP;
+    }
+
    ret = nfs_ftruncate(client->context, client->fh, offset);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Failed to truncate file");
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -224,7 +224,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
        } else {
            ret = bdrv_truncate(bs->file,
                                (s->data_end + space) << BDRV_SECTOR_BITS,
-                                NULL);
+                                PREALLOC_MODE_OFF, NULL);
        }
        if (ret < 0) {
            return ret;
@@ -458,7 +458,8 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
        res->leaks += count;
        if (fix & BDRV_FIX_LEAKS) {
            Error *local_err = NULL;
-            ret = bdrv_truncate(bs->file, res->image_end_offset, &local_err);
+            ret = bdrv_truncate(bs->file, res->image_end_offset,
+                                PREALLOC_MODE_OFF, &local_err);
            if (ret < 0) {
                error_report_err(local_err);
                res->check_errors++;
@@ -507,7 +508,7 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)

    blk_set_allow_write_beyond_eof(file, true);

-    ret = blk_truncate(file, 0, errp);
+    ret = blk_truncate(file, 0, PREALLOC_MODE_OFF, errp);
    if (ret < 0) {
        goto exit;
    }
@@ -699,7 +700,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
    }

    if (!(flags & BDRV_O_RESIZE) || !bdrv_has_zero_init(bs->file->bs) ||
-            bdrv_truncate(bs->file, bdrv_getlength(bs->file->bs), NULL) != 0) {
+            bdrv_truncate(bs->file, bdrv_getlength(bs->file->bs),
+                          PREALLOC_MODE_OFF, NULL) != 0) {
        s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
    }

@@ -742,7 +744,8 @@ static void parallels_close(BlockDriverState *bs)
    }

    if (bs->open_flags & BDRV_O_RDWR) {
-        bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS, NULL);
+        bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS,
+                      PREALLOC_MODE_OFF, NULL);
    }

    g_free(s->bat_dirty_bmap);
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -45,7 +45,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
    info->ro                     = bs->read_only;
    info->drv                    = g_strdup(bs->drv->format_name);
    info->encrypted              = bs->encrypted;
-    info->encryption_key_missing = bdrv_key_required(bs);
+    info->encryption_key_missing = false;

    info->cache = g_new(BlockdevCacheInfo, 1);
    *info->cache = (BlockdevCacheInfo) {
@@ -322,11 +322,21 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
 {
    BlockInfo *info = g_malloc0(sizeof(*info));
    BlockDriverState *bs = blk_bs(blk);
+    char *qdev;
+
    info->device = g_strdup(blk_name(blk));
    info->type = g_strdup("unknown");
    info->locked = blk_dev_is_medium_locked(blk);
    info->removable = blk_dev_has_removable_media(blk);

+    qdev = blk_get_attached_dev_id(blk);
+    if (qdev && *qdev) {
+        info->has_qdev = true;
+        info->qdev = qdev;
+    } else {
+        g_free(qdev);
+    }
+
    if (blk_dev_has_tray(blk)) {
        info->has_tray_open = true;
        info->tray_open = blk_dev_is_tray_open(blk);
@@ -462,8 +472,14 @@ BlockInfoList *qmp_query_block(Error **errp)
    BlockBackend *blk;
    Error *local_err = NULL;

-    for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
-        BlockInfoList *info = g_malloc0(sizeof(*info));
+    for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
+        BlockInfoList *info;
+
+        if (!*blk_name(blk) && !blk_get_attached_dev(blk)) {
+            continue;
+        }
+
+        info = g_malloc0(sizeof(*info));
        bdrv_query_info(blk, &info->value, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -31,8 +31,10 @@
 #include "qemu/bswap.h"
 #include <zlib.h>
 #include "qapi/qmp/qerror.h"
-#include "crypto/cipher.h"
+#include "qapi/qmp/qstring.h"
+#include "crypto/block.h"
 #include "migration/blocker.h"
+#include "block/crypto.h"

 /**************************************************************/
 /* QEMU COW block driver with compression and encryption support */
@@ -77,7 +79,7 @@ typedef struct BDRVQcowState {
    uint8_t *cluster_cache;
    uint8_t *cluster_data;
    uint64_t cluster_cache_offset;
-    QCryptoCipher *cipher; /* NULL if no key yet */
+    QCryptoBlock *crypto; /* Disk encryption format driver */
    uint32_t crypt_method_header;
    CoMutex lock;
    Error *migration_blocker;
@@ -97,6 +99,15 @@ static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
        return 0;
 }

+static QemuOptsList qcow_runtime_opts = {
+    .name = "qcow",
+    .head = QTAILQ_HEAD_INITIALIZER(qcow_runtime_opts.head),
+    .desc = {
+        BLOCK_CRYPTO_OPT_DEF_QCOW_KEY_SECRET("encrypt."),
+        { /* end of list */ }
+    },
+};
+
 static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp)
 {
@@ -105,11 +116,19 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    int ret;
    QCowHeader header;
    Error *local_err = NULL;
+    QCryptoBlockOpenOptions *crypto_opts = NULL;
+    unsigned int cflags = 0;
+    QDict *encryptopts = NULL;
+    const char *encryptfmt;
+
+    qdict_extract_subqdict(options, &encryptopts, "encrypt.");
+    encryptfmt = qdict_get_try_str(encryptopts, "format");

    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
                               false, errp);
    if (!bs->file) {
-        return -EINVAL;
+        ret = -EINVAL;
+        goto fail;
    }

    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
@@ -155,17 +174,6 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }

-    if (header.crypt_method > QCOW_CRYPT_AES) {
-        error_setg(errp, "invalid encryption method in qcow header");
-        ret = -EINVAL;
-        goto fail;
-    }
-    if (!qcrypto_cipher_supports(QCRYPTO_CIPHER_ALG_AES_128,
-                                 QCRYPTO_CIPHER_MODE_CBC)) {
-        error_setg(errp, "AES cipher not available");
-        ret = -EINVAL;
-        goto fail;
-    }
    s->crypt_method_header = header.crypt_method;
    if (s->crypt_method_header) {
        if (bdrv_uses_whitelist() &&
@@ -181,8 +189,44 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
            ret = -ENOSYS;
            goto fail;
        }
+        if (s->crypt_method_header == QCOW_CRYPT_AES) {
+            if (encryptfmt && !g_str_equal(encryptfmt, "aes")) {
+                error_setg(errp,
+                           "Header reported 'aes' encryption format but "
+                           "options specify '%s'", encryptfmt);
+                ret = -EINVAL;
+                goto fail;
+            }
+            qdict_del(encryptopts, "format");
+            crypto_opts = block_crypto_open_opts_init(
+                Q_CRYPTO_BLOCK_FORMAT_QCOW, encryptopts, errp);
+            if (!crypto_opts) {
+                ret = -EINVAL;
+                goto fail;
+            }

+            if (flags & BDRV_O_NO_IO) {
+                cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
+            }
+            s->crypto = qcrypto_block_open(crypto_opts, "encrypt.",
+                                           NULL, NULL, cflags, errp);
+            if (!s->crypto) {
+                ret = -EINVAL;
+                goto fail;
+            }
+        } else {
+            error_setg(errp, "invalid encryption method in qcow header");
+            ret = -EINVAL;
+            goto fail;
+        }
        bs->encrypted = true;
+    } else {
+        if (encryptfmt) {
+            error_setg(errp, "No encryption in image header, but options "
+                       "specified format '%s'", encryptfmt);
+            ret = -EINVAL;
+            goto fail;
+        }
    }
    s->cluster_bits = header.cluster_bits;
    s->cluster_size = 1 << s->cluster_bits;
@@ -266,6 +310,8 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }

+    QDECREF(encryptopts);
+    qapi_free_QCryptoBlockOpenOptions(crypto_opts);
    qemu_co_mutex_init(&s->lock);
    return 0;

@@ -274,6 +320,9 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
    qemu_vfree(s->l2_cache);
    g_free(s->cluster_cache);
    g_free(s->cluster_data);
+    qcrypto_block_free(s->crypto);
+    QDECREF(encryptopts);
+    qapi_free_QCryptoBlockOpenOptions(crypto_opts);
    return ret;
 }

@@ -286,85 +335,6 @@ static int qcow_reopen_prepare(BDRVReopenState *state,
    return 0;
 }

-static int qcow_set_key(BlockDriverState *bs, const char *key)
-{
-    BDRVQcowState *s = bs->opaque;
-    uint8_t keybuf[16];
-    int len, i;
-    Error *err;
-
-    memset(keybuf, 0, 16);
-    len = strlen(key);
-    if (len > 16)
-        len = 16;
-    /* XXX: we could compress the chars to 7 bits to increase
-       entropy */
-    for(i = 0;i < len;i++) {
-        keybuf[i] = key[i];
-    }
-    assert(bs->encrypted);
-
-    qcrypto_cipher_free(s->cipher);
-    s->cipher = qcrypto_cipher_new(
-        QCRYPTO_CIPHER_ALG_AES_128,
-        QCRYPTO_CIPHER_MODE_CBC,
-        keybuf, G_N_ELEMENTS(keybuf),
-        &err);
-
-    if (!s->cipher) {
-        /* XXX would be nice if errors in this method could
-         * be properly propagate to the caller. Would need
-         * the bdrv_set_key() API signature to be fixed. */
-        error_free(err);
-        return -1;
-    }
-    return 0;
-}
-
-/* The crypt function is compatible with the linux cryptoloop
-   algorithm for < 4 GB images. NOTE: out_buf == in_buf is
-   supported */
-static int encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
-                           uint8_t *out_buf, const uint8_t *in_buf,
-                           int nb_sectors, bool enc, Error **errp)
-{
-    union {
-        uint64_t ll[2];
-        uint8_t b[16];
-    } ivec;
-    int i;
-    int ret;
-
-    for(i = 0; i < nb_sectors; i++) {
-        ivec.ll[0] = cpu_to_le64(sector_num);
-        ivec.ll[1] = 0;
-        if (qcrypto_cipher_setiv(s->cipher,
-                                 ivec.b, G_N_ELEMENTS(ivec.b),
-                                 errp) < 0) {
-            return -1;
-        }
-        if (enc) {
-            ret = qcrypto_cipher_encrypt(s->cipher,
-                                         in_buf,
-                                         out_buf,
-                                         512,
-                                         errp);
-        } else {
-            ret = qcrypto_cipher_decrypt(s->cipher,
-                                         in_buf,
-                                         out_buf,
-                                         512,
-                                         errp);
-        }
-        if (ret < 0) {
-            return -1;
-        }
-        sector_num++;
-        in_buf += 512;
-        out_buf += 512;
-    }
-    return 0;
-}

 /* 'allocate' is:
 *
@@ -473,22 +443,23 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
                /* round to cluster size */
                cluster_offset = (cluster_offset + s->cluster_size - 1) &
                    ~(s->cluster_size - 1);
-                bdrv_truncate(bs->file, cluster_offset + s->cluster_size, NULL);
+                bdrv_truncate(bs->file, cluster_offset + s->cluster_size,
+                              PREALLOC_MODE_OFF, NULL);
                /* if encrypted, we must initialize the cluster
                   content which won't be written */
                if (bs->encrypted &&
                    (n_end - n_start) < s->cluster_sectors) {
                    uint64_t start_sect;
-                    assert(s->cipher);
+                    assert(s->crypto);
                    start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
-                    memset(s->cluster_data + 512, 0x00, 512);
                    for(i = 0; i < s->cluster_sectors; i++) {
                        if (i < n_start || i >= n_end) {
                            Error *err = NULL;
-                            if (encrypt_sectors(s, start_sect + i,
-                                                s->cluster_data,
-                                                s->cluster_data + 512, 1,
-                                                true, &err) < 0) {
+                            memset(s->cluster_data, 0x00, 512);
+                            if (qcrypto_block_encrypt(s->crypto, start_sect + i,
+                                                      s->cluster_data,
+                                                      BDRV_SECTOR_SIZE,
+                                                      &err) < 0) {
                                error_free(err);
                                errno = EIO;
                                return -1;
@@ -533,7 +504,7 @@ static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
    if (!cluster_offset) {
        return 0;
    }
-    if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->cipher) {
+    if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->crypto) {
        return BDRV_BLOCK_DATA;
    }
    cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
@@ -664,9 +635,9 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
                break;
            }
            if (bs->encrypted) {
-                assert(s->cipher);
-                if (encrypt_sectors(s, sector_num, buf, buf,
-                                    n, false, &err) < 0) {
+                assert(s->crypto);
+                if (qcrypto_block_decrypt(s->crypto, sector_num, buf,
+                                          n * BDRV_SECTOR_SIZE, &err) < 0) {
                    goto fail;
                }
            }
@@ -700,9 +671,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
    BDRVQcowState *s = bs->opaque;
    int index_in_cluster;
    uint64_t cluster_offset;
-    const uint8_t *src_buf;
    int ret = 0, n;
-    uint8_t *cluster_data = NULL;
    struct iovec hd_iov;
    QEMUIOVector hd_qiov;
    uint8_t *buf;
@@ -710,7 +679,9 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,

    s->cluster_cache_offset = -1; /* disable compressed cache */

-    if (qiov->niov > 1) {
+    /* We must always copy the iov when encrypting, so we
+     * don't modify the original data buffer during encryption */
+    if (bs->encrypted || qiov->niov > 1) {
        buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
        if (buf == NULL) {
            return -ENOMEM;
@@ -739,22 +710,16 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
        }
        if (bs->encrypted) {
            Error *err = NULL;
-            assert(s->cipher);
-            if (!cluster_data) {
-                cluster_data = g_malloc0(s->cluster_size);
-            }
-            if (encrypt_sectors(s, sector_num, cluster_data, buf,
-                                n, true, &err) < 0) {
+            assert(s->crypto);
+            if (qcrypto_block_encrypt(s->crypto, sector_num, buf,
+                                      n * BDRV_SECTOR_SIZE, &err) < 0) {
                error_free(err);
                ret = -EIO;
                break;
            }
-            src_buf = cluster_data;
-        } else {
-            src_buf = buf;
        }

-        hd_iov.iov_base = (void *)src_buf;
+        hd_iov.iov_base = (void *)buf;
        hd_iov.iov_len = n * 512;
        qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
        qemu_co_mutex_unlock(&s->lock);
@@ -773,10 +738,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
    }
    qemu_co_mutex_unlock(&s->lock);

-    if (qiov->niov > 1) {
-        qemu_vfree(orig_buf);
-    }
-    g_free(cluster_data);
+    qemu_vfree(orig_buf);

    return ret;
 }
@@ -785,8 +747,8 @@ static void qcow_close(BlockDriverState *bs)
 {
    BDRVQcowState *s = bs->opaque;

-    qcrypto_cipher_free(s->cipher);
-    s->cipher = NULL;
+    qcrypto_block_free(s->crypto);
+    s->crypto = NULL;
    g_free(s->l1_table);
    qemu_vfree(s->l2_cache);
    g_free(s->cluster_cache);
@@ -803,17 +765,35 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
    uint8_t *tmp;
    int64_t total_size = 0;
    char *backing_file = NULL;
-    int flags = 0;
    Error *local_err = NULL;
    int ret;
    BlockBackend *qcow_blk;
+    const char *encryptfmt = NULL;
+    QDict *options;
+    QDict *encryptopts = NULL;
+    QCryptoBlockCreateOptions *crypto_opts = NULL;
+    QCryptoBlock *crypto = NULL;

    /* Read out options */
    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
                          BDRV_SECTOR_SIZE);
+    if (total_size == 0) {
+        error_setg(errp, "Image size is too small, cannot be zero length");
+        ret = -EINVAL;
+        goto cleanup;
+    }
+
    backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
-    if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) {
-        flags |= BLOCK_FLAG_ENCRYPT;
+    encryptfmt = qemu_opt_get_del(opts, BLOCK_OPT_ENCRYPT_FORMAT);
+    if (encryptfmt) {
+        if (qemu_opt_get(opts, BLOCK_OPT_ENCRYPT)) {
+            error_setg(errp, "Options " BLOCK_OPT_ENCRYPT " and "
+                       BLOCK_OPT_ENCRYPT_FORMAT " are mutually exclusive");
+            ret = -EINVAL;
+            goto cleanup;
+        }
+    } else if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) {
+        encryptfmt = "aes";
    }

    ret = bdrv_create_file(filename, opts, &local_err);
@@ -833,7 +813,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)

    blk_set_allow_write_beyond_eof(qcow_blk, true);

-    ret = blk_truncate(qcow_blk, 0, errp);
+    ret = blk_truncate(qcow_blk, 0, PREALLOC_MODE_OFF, errp);
    if (ret < 0) {
        goto exit;
    }
@@ -867,8 +847,32 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
    l1_size = (total_size + (1LL << shift) - 1) >> shift;

    header.l1_table_offset = cpu_to_be64(header_size);
-    if (flags & BLOCK_FLAG_ENCRYPT) {
+
+    options = qemu_opts_to_qdict(opts, NULL);
+    qdict_extract_subqdict(options, &encryptopts, "encrypt.");
+    QDECREF(options);
+    if (encryptfmt) {
+        if (!g_str_equal(encryptfmt, "aes")) {
+            error_setg(errp, "Unknown encryption format '%s', expected 'aes'",
+                       encryptfmt);
+            ret = -EINVAL;
+            goto exit;
+        }
        header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
+
+        crypto_opts = block_crypto_create_opts_init(
+            Q_CRYPTO_BLOCK_FORMAT_QCOW, encryptopts, errp);
+        if (!crypto_opts) {
+            ret = -EINVAL;
+            goto exit;
+        }
+
+        crypto = qcrypto_block_create(crypto_opts, "encrypt.",
+                                      NULL, NULL, NULL, errp);
+        if (!crypto) {
+            ret = -EINVAL;
+            goto exit;
+        }
    } else {
        header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
    }
@@ -903,6 +907,9 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
 exit:
    blk_unref(qcow_blk);
 cleanup:
+    QDECREF(encryptopts);
+    qcrypto_block_free(crypto);
+    qapi_free_QCryptoBlockCreateOptions(crypto_opts);
    g_free(backing_file);
    return ret;
 }
@@ -917,7 +924,8 @@ static int qcow_make_empty(BlockDriverState *bs)
    if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
            l1_length) < 0)
        return -1;
-    ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length, NULL);
+    ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length,
+                        PREALLOC_MODE_OFF, NULL);
    if (ret < 0)
        return ret;

@@ -1041,9 +1049,15 @@ static QemuOptsList qcow_create_opts = {
        {
            .name = BLOCK_OPT_ENCRYPT,
            .type = QEMU_OPT_BOOL,
-            .help = "Encrypt the image",
-            .def_value_str = "off"
+            .help = "Encrypt the image with format 'aes'. (Deprecated "
+                    "in favor of " BLOCK_OPT_ENCRYPT_FORMAT "=aes)",
        },
+        {
+            .name = BLOCK_OPT_ENCRYPT_FORMAT,
+            .type = QEMU_OPT_STRING,
+            .help = "Encrypt the image, format choices: 'aes'",
+        },
+        BLOCK_CRYPTO_OPT_DEF_QCOW_KEY_SECRET("encrypt."),
        { /* end of list */ }
    }
 };
@@ -1064,7 +1078,6 @@ static BlockDriver bdrv_qcow = {
    .bdrv_co_writev         = qcow_co_writev,
    .bdrv_co_get_block_status   = qcow_co_get_block_status,

-    .bdrv_set_key           = qcow_set_key,
    .bdrv_make_empty        = qcow_make_empty,
    .bdrv_co_pwritev_compressed = qcow_co_pwritev_compressed,
    .bdrv_get_info          = qcow_get_info,
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -357,76 +357,21 @@ static int count_contiguous_clusters_unallocated(int nb_clusters,
    return i;
 }

-/* The crypt function is compatible with the linux cryptoloop
-   algorithm for < 4 GB images. NOTE: out_buf == in_buf is
-   supported */
-int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
-                          uint8_t *out_buf, const uint8_t *in_buf,
-                          int nb_sectors, bool enc,
-                          Error **errp)
+static int coroutine_fn do_perform_cow_read(BlockDriverState *bs,
+                                            uint64_t src_cluster_offset,
+                                            unsigned offset_in_cluster,
+                                            QEMUIOVector *qiov)
 {
-    union {
-        uint64_t ll[2];
-        uint8_t b[16];
-    } ivec;
-    int i;
    int ret;

-    for(i = 0; i < nb_sectors; i++) {
-        ivec.ll[0] = cpu_to_le64(sector_num);
-        ivec.ll[1] = 0;
-        if (qcrypto_cipher_setiv(s->cipher,
-                                 ivec.b, G_N_ELEMENTS(ivec.b),
-                                 errp) < 0) {
-            return -1;
-        }
-        if (enc) {
-            ret = qcrypto_cipher_encrypt(s->cipher,
-                                         in_buf,
-                                         out_buf,
-                                         512,
-                                         errp);
-        } else {
-            ret = qcrypto_cipher_decrypt(s->cipher,
-                                         in_buf,
-                                         out_buf,
-                                         512,
-                                         errp);
-        }
-        if (ret < 0) {
-            return -1;
-        }
-        sector_num++;
-        in_buf += 512;
-        out_buf += 512;
+    if (qiov->size == 0) {
+        return 0;
    }
-    return 0;
-}
-
-static int coroutine_fn do_perform_cow(BlockDriverState *bs,
-                                       uint64_t src_cluster_offset,
-                                       uint64_t cluster_offset,
-                                       int offset_in_cluster,
-                                       int bytes)
-{
-    BDRVQcow2State *s = bs->opaque;
-    QEMUIOVector qiov;
-    struct iovec iov;
-    int ret;
-
-    iov.iov_len = bytes;
-    iov.iov_base = qemu_try_blockalign(bs, iov.iov_len);
-    if (iov.iov_base == NULL) {
-        return -ENOMEM;
-    }
-
-    qemu_iovec_init_external(&qiov, &iov, 1);

    BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);

    if (!bs->drv) {
-        ret = -ENOMEDIUM;
-        goto out;
+        return -ENOMEDIUM;
    }

    /* Call .bdrv_co_readv() directly instead of using the public block-layer
@@ -434,43 +379,63 @@ static int coroutine_fn do_perform_cow(BlockDriverState *bs,
     * which can lead to deadlock when block layer copy-on-read is enabled.
     */
    ret = bs->drv->bdrv_co_preadv(bs, src_cluster_offset + offset_in_cluster,
-                                  bytes, &qiov, 0);
+                                  qiov->size, qiov, 0);
    if (ret < 0) {
-        goto out;
+        return ret;
    }

-    if (bs->encrypted) {
-        Error *err = NULL;
-        int64_t sector = (src_cluster_offset + offset_in_cluster)
+    return 0;
+}
+
+static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs,
+                                                uint64_t src_cluster_offset,
+                                                uint64_t cluster_offset,
+                                                unsigned offset_in_cluster,
+                                                uint8_t *buffer,
+                                                unsigned bytes)
+{
+    if (bytes && bs->encrypted) {
+        BDRVQcow2State *s = bs->opaque;
+        int64_t sector = (s->crypt_physical_offset ?
+                          (cluster_offset + offset_in_cluster) :
+                          (src_cluster_offset + offset_in_cluster))
                         >> BDRV_SECTOR_BITS;
-        assert(s->cipher);
        assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0);
        assert((bytes & ~BDRV_SECTOR_MASK) == 0);
-        if (qcow2_encrypt_sectors(s, sector, iov.iov_base, iov.iov_base,
-                                  bytes >> BDRV_SECTOR_BITS, true, &err) < 0) {
-            ret = -EIO;
-            error_free(err);
-            goto out;
+        assert(s->crypto);
+        if (qcrypto_block_encrypt(s->crypto, sector, buffer,
+                                  bytes, NULL) < 0) {
+            return false;
        }
    }
+    return true;
+}
+
+static int coroutine_fn do_perform_cow_write(BlockDriverState *bs,
+                                             uint64_t cluster_offset,
+                                             unsigned offset_in_cluster,
+                                             QEMUIOVector *qiov)
+{
+    int ret;
+
+    if (qiov->size == 0) {
+        return 0;
+    }

    ret = qcow2_pre_write_overlap_check(bs, 0,
-            cluster_offset + offset_in_cluster, bytes);
+            cluster_offset + offset_in_cluster, qiov->size);
    if (ret < 0) {
-        goto out;
+        return ret;
    }

    BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
    ret = bdrv_co_pwritev(bs->file, cluster_offset + offset_in_cluster,
-                          bytes, &qiov, 0);
+                          qiov->size, qiov, 0);
    if (ret < 0) {
-        goto out;
+        return ret;
    }

-    ret = 0;
-out:
-    qemu_vfree(iov.iov_base);
-    return ret;
+    return 0;
 }


@@ -548,7 +513,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,

    /* find the cluster offset for the given disk offset */

-    l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
+    l2_index = offset_to_l2_index(s, offset);
    *cluster_offset = be64_to_cpu(l2_table[l2_index]);

    nb_clusters = size_to_clusters(s, bytes_needed);
@@ -685,7 +650,7 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,

    /* find the cluster offset for the given disk offset */

-    l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
+    l2_index = offset_to_l2_index(s, offset);

    *new_l2_table = l2_table;
    *new_l2_index = l2_index;
@@ -753,31 +718,134 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
    return cluster_offset;
 }

-static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r)
+static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)
 {
    BDRVQcow2State *s = bs->opaque;
+    Qcow2COWRegion *start = &m->cow_start;
+    Qcow2COWRegion *end = &m->cow_end;
+    unsigned buffer_size;
+    unsigned data_bytes = end->offset - (start->offset + start->nb_bytes);
+    bool merge_reads;
+    uint8_t *start_buffer, *end_buffer;
+    QEMUIOVector qiov;
    int ret;

-    if (r->nb_bytes == 0) {
+    assert(start->nb_bytes <= UINT_MAX - end->nb_bytes);
+    assert(start->nb_bytes + end->nb_bytes <= UINT_MAX - data_bytes);
+    assert(start->offset + start->nb_bytes <= end->offset);
+    assert(!m->data_qiov || m->data_qiov->size == data_bytes);
+
+    if (start->nb_bytes == 0 && end->nb_bytes == 0) {
        return 0;
    }

-    qemu_co_mutex_unlock(&s->lock);
-    ret = do_perform_cow(bs, m->offset, m->alloc_offset, r->offset, r->nb_bytes);
-    qemu_co_mutex_lock(&s->lock);
-
-    if (ret < 0) {
-        return ret;
+    /* If we have to read both the start and end COW regions and the
+     * middle region is not too large then perform just one read
+     * operation */
+    merge_reads = start->nb_bytes && end->nb_bytes && data_bytes <= 16384;
+    if (merge_reads) {
+        buffer_size = start->nb_bytes + data_bytes + end->nb_bytes;
+    } else {
+        /* If we have to do two reads, add some padding in the middle
+         * if necessary to make sure that the end region is optimally
+         * aligned. */
+        size_t align = bdrv_opt_mem_align(bs);
+        assert(align > 0 && align <= UINT_MAX);
+        assert(QEMU_ALIGN_UP(start->nb_bytes, align) <=
+               UINT_MAX - end->nb_bytes);
+        buffer_size = QEMU_ALIGN_UP(start->nb_bytes, align) + end->nb_bytes;
    }

+    /* Reserve a buffer large enough to store all the data that we're
+     * going to read */
+    start_buffer = qemu_try_blockalign(bs, buffer_size);
+    if (start_buffer == NULL) {
+        return -ENOMEM;
+    }
+    /* The part of the buffer where the end region is located */
+    end_buffer = start_buffer + buffer_size - end->nb_bytes;
+
+    qemu_iovec_init(&qiov, 2 + (m->data_qiov ? m->data_qiov->niov : 0));
+
+    qemu_co_mutex_unlock(&s->lock);
+    /* First we read the existing data from both COW regions. We
+     * either read the whole region in one go, or the start and end
+     * regions separately. */
+    if (merge_reads) {
+        qemu_iovec_add(&qiov, start_buffer, buffer_size);
+        ret = do_perform_cow_read(bs, m->offset, start->offset, &qiov);
+    } else {
+        qemu_iovec_add(&qiov, start_buffer, start->nb_bytes);
+        ret = do_perform_cow_read(bs, m->offset, start->offset, &qiov);
+        if (ret < 0) {
+            goto fail;
+        }
+
+        qemu_iovec_reset(&qiov);
+        qemu_iovec_add(&qiov, end_buffer, end->nb_bytes);
+        ret = do_perform_cow_read(bs, m->offset, end->offset, &qiov);
+    }
+    if (ret < 0) {
+        goto fail;
+    }
+
+    /* Encrypt the data if necessary before writing it */
+    if (bs->encrypted) {
+        if (!do_perform_cow_encrypt(bs, m->offset, m->alloc_offset,
+                                    start->offset, start_buffer,
+                                    start->nb_bytes) ||
+            !do_perform_cow_encrypt(bs, m->offset, m->alloc_offset,
+                                    end->offset, end_buffer, end->nb_bytes)) {
+            ret = -EIO;
+            goto fail;
+        }
+    }
+
+    /* And now we can write everything. If we have the guest data we
+     * can write everything in one single operation */
+    if (m->data_qiov) {
+        qemu_iovec_reset(&qiov);
+        if (start->nb_bytes) {
+            qemu_iovec_add(&qiov, start_buffer, start->nb_bytes);
+        }
+        qemu_iovec_concat(&qiov, m->data_qiov, 0, data_bytes);
+        if (end->nb_bytes) {
+            qemu_iovec_add(&qiov, end_buffer, end->nb_bytes);
+        }
+        /* NOTE: we have a write_aio blkdebug event here followed by
+         * a cow_write one in do_perform_cow_write(), but there's only
+         * one single I/O operation */
+        BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
+        ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov);
+    } else {
+        /* If there's no guest data then write both COW regions separately */
+        qemu_iovec_reset(&qiov);
+        qemu_iovec_add(&qiov, start_buffer, start->nb_bytes);
+        ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov);
+        if (ret < 0) {
+            goto fail;
+        }
+
+        qemu_iovec_reset(&qiov);
+        qemu_iovec_add(&qiov, end_buffer, end->nb_bytes);
+        ret = do_perform_cow_write(bs, m->alloc_offset, end->offset, &qiov);
+    }
+
+fail:
+    qemu_co_mutex_lock(&s->lock);
+
    /*
     * Before we update the L2 table to actually point to the new cluster, we
     * need to be sure that the refcounts have been increased and COW was
     * handled.
     */
-    qcow2_cache_depends_on_flush(s->l2_table_cache);
+    if (ret == 0) {
+        qcow2_cache_depends_on_flush(s->l2_table_cache);
+    }

-    return 0;
+    qemu_vfree(start_buffer);
+    qemu_iovec_destroy(&qiov);
+    return ret;
 }

 int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
@@ -797,12 +865,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
    }

    /* copy content of unmodified sectors */
-    ret = perform_cow(bs, m, &m->cow_start);
-    if (ret < 0) {
-        goto err;
-    }
-
-    ret = perform_cow(bs, m, &m->cow_end);
+    ret = perform_cow(bs, m);
    if (ret < 0) {
        goto err;
    }
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -281,25 +281,6 @@ int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
    return 0;
 }

-/*
- * Rounds the refcount table size up to avoid growing the table for each single
- * refcount block that is allocated.
- */
-static unsigned int next_refcount_table_size(BDRVQcow2State *s,
-    unsigned int min_size)
-{
-    unsigned int min_clusters = (min_size >> (s->cluster_bits - 3)) + 1;
-    unsigned int refcount_table_clusters =
-        MAX(1, s->refcount_table_size >> (s->cluster_bits - 3));
-
-    while (min_clusters > refcount_table_clusters) {
-        refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
-    }
-
-    return refcount_table_clusters << (s->cluster_bits - 3);
-}
-
-
 /* Checks if two offsets are described by the same refcount block */
 static int in_same_refcount_block(BDRVQcow2State *s, uint64_t offset_a,
    uint64_t offset_b)
@@ -321,7 +302,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
 {
    BDRVQcow2State *s = bs->opaque;
    unsigned int refcount_table_index;
-    int ret;
+    int64_t ret;

    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);

@@ -396,7 +377,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
        ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
                                    refcount_block);
        if (ret < 0) {
-            goto fail_block;
+            goto fail;
        }

        memset(*refcount_block, 0, s->cluster_size);
@@ -411,12 +392,12 @@ static int alloc_refcount_block(BlockDriverState *bs,
        ret = update_refcount(bs, new_block, s->cluster_size, 1, false,
                              QCOW2_DISCARD_NEVER);
        if (ret < 0) {
-            goto fail_block;
+            goto fail;
        }

        ret = qcow2_cache_flush(bs, s->refcount_block_cache);
        if (ret < 0) {
-            goto fail_block;
+            goto fail;
        }

        /* Initialize the new refcount block only after updating its refcount,
@@ -424,7 +405,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
        ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
                                    refcount_block);
        if (ret < 0) {
-            goto fail_block;
+            goto fail;
        }

        memset(*refcount_block, 0, s->cluster_size);
@@ -435,7 +416,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
    qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, *refcount_block);
    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
    if (ret < 0) {
-        goto fail_block;
+        goto fail;
    }

    /* If the refcount table is big enough, just hook the block up there */
@@ -446,7 +427,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
            s->refcount_table_offset + refcount_table_index * sizeof(uint64_t),
            &data64, sizeof(data64));
        if (ret < 0) {
-            goto fail_block;
+            goto fail;
        }

        s->refcount_table[refcount_table_index] = new_block;
@@ -490,74 +471,201 @@ static int alloc_refcount_block(BlockDriverState *bs,
                                            (new_block >> s->cluster_bits) + 1),
                                        s->refcount_block_size);

-    if (blocks_used > QCOW_MAX_REFTABLE_SIZE / sizeof(uint64_t)) {
-        return -EFBIG;
-    }
-
-    /* And now we need at least one block more for the new metadata */
-    uint64_t table_size = next_refcount_table_size(s, blocks_used + 1);
-    uint64_t last_table_size;
-    uint64_t blocks_clusters;
-    do {
-        uint64_t table_clusters =
-            size_to_clusters(s, table_size * sizeof(uint64_t));
-        blocks_clusters = 1 +
-            DIV_ROUND_UP(table_clusters, s->refcount_block_size);
-        uint64_t meta_clusters = table_clusters + blocks_clusters;
-
-        last_table_size = table_size;
-        table_size = next_refcount_table_size(s, blocks_used +
-            DIV_ROUND_UP(meta_clusters, s->refcount_block_size));
-
-    } while (last_table_size != table_size);
-
-#ifdef DEBUG_ALLOC2
-    fprintf(stderr, "qcow2: Grow refcount table %" PRId32 " => %" PRId64 "\n",
-        s->refcount_table_size, table_size);
-#endif
-
    /* Create the new refcount table and blocks */
    uint64_t meta_offset = (blocks_used * s->refcount_block_size) *
        s->cluster_size;
-    uint64_t table_offset = meta_offset + blocks_clusters * s->cluster_size;
-    uint64_t *new_table = g_try_new0(uint64_t, table_size);
-    void *new_blocks = g_try_malloc0(blocks_clusters * s->cluster_size);

-    assert(table_size > 0 && blocks_clusters > 0);
-    if (new_table == NULL || new_blocks == NULL) {
+    ret = qcow2_refcount_area(bs, meta_offset, 0, false,
+                              refcount_table_index, new_block);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = load_refcount_block(bs, new_block, refcount_block);
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* If we were trying to do the initial refcount update for some cluster
+     * allocation, we might have used the same clusters to store newly
+     * allocated metadata. Make the caller search some new space. */
+    return -EAGAIN;
+
+fail:
+    if (*refcount_block != NULL) {
+        qcow2_cache_put(bs, s->refcount_block_cache, refcount_block);
+    }
+    return ret;
+}
+
+/*
+ * Starting at @start_offset, this function creates new self-covering refcount
+ * structures: A new refcount table and refcount blocks which cover all of
+ * themselves, and a number of @additional_clusters beyond their end.
+ * @start_offset must be at the end of the image file, that is, there must be
+ * only empty space beyond it.
+ * If @exact_size is false, the refcount table will have 50 % more entries than
+ * necessary so it will not need to grow again soon.
+ * If @new_refblock_offset is not zero, it contains the offset of a refcount
+ * block that should be entered into the new refcount table at index
+ * @new_refblock_index.
+ *
+ * Returns: The offset after the new refcount structures (i.e. where the
+ *          @additional_clusters may be placed) on success, -errno on error.
+ */
+int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t start_offset,
+                            uint64_t additional_clusters, bool exact_size,
+                            int new_refblock_index,
+                            uint64_t new_refblock_offset)
+{
+    BDRVQcow2State *s = bs->opaque;
+    uint64_t total_refblock_count_u64, additional_refblock_count;
+    int total_refblock_count, table_size, area_reftable_index, table_clusters;
+    int i;
+    uint64_t table_offset, block_offset, end_offset;
+    int ret;
+    uint64_t *new_table;
+
+    assert(!(start_offset % s->cluster_size));
+
+    qcow2_refcount_metadata_size(start_offset / s->cluster_size +
+                                 additional_clusters,
+                                 s->cluster_size, s->refcount_order,
+                                 !exact_size, &total_refblock_count_u64);
+    if (total_refblock_count_u64 > QCOW_MAX_REFTABLE_SIZE) {
+        return -EFBIG;
+    }
+    total_refblock_count = total_refblock_count_u64;
+
+    /* Index in the refcount table of the first refcount block to cover the area
+     * of refcount structures we are about to create; we know that
+     * @total_refblock_count can cover @start_offset, so this will definitely
+     * fit into an int. */
+    area_reftable_index = (start_offset / s->cluster_size) /
+                          s->refcount_block_size;
+
+    if (exact_size) {
+        table_size = total_refblock_count;
+    } else {
+        table_size = total_refblock_count +
+                     DIV_ROUND_UP(total_refblock_count, 2);
+    }
+    /* The qcow2 file can only store the reftable size in number of clusters */
+    table_size = ROUND_UP(table_size, s->cluster_size / sizeof(uint64_t));
+    table_clusters = (table_size * sizeof(uint64_t)) / s->cluster_size;
+
+    if (table_size > QCOW_MAX_REFTABLE_SIZE) {
+        return -EFBIG;
+    }
+
+    new_table = g_try_new0(uint64_t, table_size);
+
+    assert(table_size > 0);
+    if (new_table == NULL) {
        ret = -ENOMEM;
-        goto fail_table;
+        goto fail;
    }

    /* Fill the new refcount table */
-    memcpy(new_table, s->refcount_table,
-        s->refcount_table_size * sizeof(uint64_t));
-    new_table[refcount_table_index] = new_block;
-
-    int i;
-    for (i = 0; i < blocks_clusters; i++) {
-        new_table[blocks_used + i] = meta_offset + (i * s->cluster_size);
+    if (table_size > s->max_refcount_table_index) {
+        /* We're actually growing the reftable */
+        memcpy(new_table, s->refcount_table,
+               (s->max_refcount_table_index + 1) * sizeof(uint64_t));
+    } else {
+        /* Improbable case: We're shrinking the reftable. However, the caller
+         * has assured us that there is only empty space beyond @start_offset,
+         * so we can simply drop all of the refblocks that won't fit into the
+         * new reftable. */
+        memcpy(new_table, s->refcount_table, table_size * sizeof(uint64_t));
    }

-    /* Fill the refcount blocks */
-    uint64_t table_clusters = size_to_clusters(s, table_size * sizeof(uint64_t));
-    int block = 0;
-    for (i = 0; i < table_clusters + blocks_clusters; i++) {
-        s->set_refcount(new_blocks, block++, 1);
+    if (new_refblock_offset) {
+        assert(new_refblock_index < total_refblock_count);
+        new_table[new_refblock_index] = new_refblock_offset;
    }

+    /* Count how many new refblocks we have to create */
+    additional_refblock_count = 0;
+    for (i = area_reftable_index; i < total_refblock_count; i++) {
+        if (!new_table[i]) {
+            additional_refblock_count++;
+        }
+    }
+
+    table_offset = start_offset + additional_refblock_count * s->cluster_size;
+    end_offset = table_offset + table_clusters * s->cluster_size;
+
+    /* Fill the refcount blocks, and create new ones, if necessary */
+    block_offset = start_offset;
+    for (i = area_reftable_index; i < total_refblock_count; i++) {
+        void *refblock_data;
+        uint64_t first_offset_covered;
+
+        /* Reuse an existing refblock if possible, create a new one otherwise */
+        if (new_table[i]) {
+            ret = qcow2_cache_get(bs, s->refcount_block_cache, new_table[i],
+                                  &refblock_data);
+            if (ret < 0) {
+                goto fail;
+            }
+        } else {
+            ret = qcow2_cache_get_empty(bs, s->refcount_block_cache,
+                                        block_offset, &refblock_data);
+            if (ret < 0) {
+                goto fail;
+            }
+            memset(refblock_data, 0, s->cluster_size);
+            qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache,
+                                         refblock_data);
+
+            new_table[i] = block_offset;
+            block_offset += s->cluster_size;
+        }
+
+        /* First host offset covered by this refblock */
+        first_offset_covered = (uint64_t)i * s->refcount_block_size *
+                               s->cluster_size;
+        if (first_offset_covered < end_offset) {
+            int j, end_index;
+
+            /* Set the refcount of all of the new refcount structures to 1 */
+
+            if (first_offset_covered < start_offset) {
+                assert(i == area_reftable_index);
+                j = (start_offset - first_offset_covered) / s->cluster_size;
+                assert(j < s->refcount_block_size);
+            } else {
+                j = 0;
+            }
+
+            end_index = MIN((end_offset - first_offset_covered) /
+                            s->cluster_size,
+                            s->refcount_block_size);
+
+            for (; j < end_index; j++) {
+                /* The caller guaranteed us this space would be empty */
+                assert(s->get_refcount(refblock_data, j) == 0);
+                s->set_refcount(refblock_data, j, 1);
+            }
+
+            qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache,
+                                         refblock_data);
+        }
+
+        qcow2_cache_put(bs, s->refcount_block_cache, &refblock_data);
+    }
+
+    assert(block_offset == table_offset);
+
    /* Write refcount blocks to disk */
    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
-    ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks,
-        blocks_clusters * s->cluster_size);
-    g_free(new_blocks);
-    new_blocks = NULL;
+    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
    if (ret < 0) {
-        goto fail_table;
+        goto fail;
    }

    /* Write refcount table to disk */
-    for(i = 0; i < table_size; i++) {
+    for (i = 0; i < total_refblock_count; i++) {
        cpu_to_be64s(&new_table[i]);
    }

@@ -565,10 +673,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
    ret = bdrv_pwrite_sync(bs->file, table_offset, new_table,
        table_size * sizeof(uint64_t));
    if (ret < 0) {
-        goto fail_table;
+        goto fail;
    }

-    for(i = 0; i < table_size; i++) {
+    for (i = 0; i < total_refblock_count; i++) {
        be64_to_cpus(&new_table[i]);
    }

@@ -584,7 +692,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
                           offsetof(QCowHeader, refcount_table_offset),
                           &data, sizeof(data));
    if (ret < 0) {
-        goto fail_table;
+        goto fail;
    }

    /* And switch it in memory */
@@ -601,23 +709,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
    qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
                        QCOW2_DISCARD_OTHER);

-    ret = load_refcount_block(bs, new_block, refcount_block);
-    if (ret < 0) {
-        return ret;
-    }
+    return end_offset;

-    /* If we were trying to do the initial refcount update for some cluster
-     * allocation, we might have used the same clusters to store newly
-     * allocated metadata. Make the caller search some new space. */
-    return -EAGAIN;
-
-fail_table:
-    g_free(new_blocks);
+fail:
    g_free(new_table);
-fail_block:
-    if (*refcount_block != NULL) {
-        qcow2_cache_put(bs, s->refcount_block_cache, refcount_block);
-    }
    return ret;
 }

@@ -1323,11 +1418,10 @@ static int realloc_refcount_array(BDRVQcow2State *s, void **array,
 *
 * Modifies the number of errors in res.
 */
-static int inc_refcounts(BlockDriverState *bs,
-                         BdrvCheckResult *res,
-                         void **refcount_table,
-                         int64_t *refcount_table_size,
-                         int64_t offset, int64_t size)
+int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
+                             void **refcount_table,
+                             int64_t *refcount_table_size,
+                             int64_t offset, int64_t size)
 {
    BDRVQcow2State *s = bs->opaque;
    uint64_t start, last, cluster_offset, k, refcount;
@@ -1420,8 +1514,9 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
            nb_csectors = ((l2_entry >> s->csize_shift) &
                           s->csize_mask) + 1;
            l2_entry &= s->cluster_offset_mask;
-            ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
-                                l2_entry & ~511, nb_csectors * 512);
+            ret = qcow2_inc_refcounts_imrt(bs, res,
+                                           refcount_table, refcount_table_size,
+                                           l2_entry & ~511, nb_csectors * 512);
            if (ret < 0) {
                goto fail;
            }
@@ -1454,8 +1549,9 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
            }

            /* Mark cluster as used */
-            ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
-                                offset, s->cluster_size);
+            ret = qcow2_inc_refcounts_imrt(bs, res,
+                                           refcount_table, refcount_table_size,
+                                           offset, s->cluster_size);
            if (ret < 0) {
                goto fail;
            }
@@ -1508,8 +1604,8 @@ static int check_refcounts_l1(BlockDriverState *bs,
    l1_size2 = l1_size * sizeof(uint64_t);

    /* Mark L1 table as used */
-    ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
-                        l1_table_offset, l1_size2);
+    ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, refcount_table_size,
+                                   l1_table_offset, l1_size2);
    if (ret < 0) {
        goto fail;
    }
@@ -1538,8 +1634,9 @@ static int check_refcounts_l1(BlockDriverState *bs,
        if (l2_offset) {
            /* Mark L2 table as used */
            l2_offset &= L1E_OFFSET_MASK;
-            ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
-                                l2_offset, s->cluster_size);
+            ret = qcow2_inc_refcounts_imrt(bs, res,
+                                           refcount_table, refcount_table_size,
+                                           l2_offset, s->cluster_size);
            if (ret < 0) {
                goto fail;
            }
@@ -1730,7 +1827,7 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
                }

                ret = bdrv_truncate(bs->file, offset + s->cluster_size,
-                                    &local_err);
+                                    PREALLOC_MODE_OFF, &local_err);
                if (ret < 0) {
                    error_report_err(local_err);
                    goto resize_fail;
@@ -1757,14 +1854,15 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
                }

                res->corruptions_fixed++;
-                ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
-                                    offset, s->cluster_size);
+                ret = qcow2_inc_refcounts_imrt(bs, res,
+                                               refcount_table, nb_clusters,
+                                               offset, s->cluster_size);
                if (ret < 0) {
                    return ret;
                }
                /* No need to check whether the refcount is now greater than 1:
                 * This area was just allocated and zeroed, so it can only be
-                 * exactly 1 after inc_refcounts() */
+                 * exactly 1 after qcow2_inc_refcounts_imrt() */
                continue;

 resize_fail:
@@ -1779,8 +1877,8 @@ resize_fail:
        }

        if (offset != 0) {
-            ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
-                                offset, s->cluster_size);
+            ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
+                                           offset, s->cluster_size);
            if (ret < 0) {
                return ret;
            }
@@ -1820,8 +1918,8 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
    }

    /* header */
-    ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
-                        0, s->cluster_size);
+    ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
+                                   0, s->cluster_size);
    if (ret < 0) {
        return ret;
    }
@@ -1842,16 +1940,32 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
            return ret;
        }
    }
-    ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
-                        s->snapshots_offset, s->snapshots_size);
+    ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
+                                   s->snapshots_offset, s->snapshots_size);
    if (ret < 0) {
        return ret;
    }

    /* refcount data */
-    ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
-                        s->refcount_table_offset,
-                        s->refcount_table_size * sizeof(uint64_t));
+    ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
+                                   s->refcount_table_offset,
+                                   s->refcount_table_size * sizeof(uint64_t));
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* encryption */
+    if (s->crypto_header.length) {
+        ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, nb_clusters,
+                                       s->crypto_header.offset,
+                                       s->crypto_header.length);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    /* bitmaps */
+    ret = qcow2_check_bitmaps_refcounts(bs, res, refcount_table, nb_clusters);
    if (ret < 0) {
        return ret;
    }
--- a/block/qcow2.c
+++ b/block/qcow2.c
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -25,7 +25,7 @@
 #ifndef BLOCK_QCOW2_H
 #define BLOCK_QCOW2_H

-#include "crypto/cipher.h"
+#include "crypto/block.h"
 #include "qemu/coroutine.h"

 //#define DEBUG_ALLOC
@@ -36,6 +36,7 @@

 #define QCOW_CRYPT_NONE 0
 #define QCOW_CRYPT_AES  1
+#define QCOW_CRYPT_LUKS 2

 #define QCOW_MAX_CRYPT_CLUSTERS 32
 #define QCOW_MAX_SNAPSHOTS 65536
@@ -52,6 +53,10 @@
 * space for snapshot names and IDs */
 #define QCOW_MAX_SNAPSHOTS_SIZE (1024 * QCOW_MAX_SNAPSHOTS)

+/* Bitmap header extension constraints */
+#define QCOW2_MAX_BITMAPS 65535
+#define QCOW2_MAX_BITMAP_DIRECTORY_SIZE (1024 * QCOW2_MAX_BITMAPS)
+
 /* indicate that the refcount of the referenced cluster is exactly one. */
 #define QCOW_OFLAG_COPIED     (1ULL << 63)
 /* indicate that the cluster is compressed (they never have the copied flag) */
@@ -163,6 +168,11 @@ typedef struct QCowSnapshot {
 struct Qcow2Cache;
 typedef struct Qcow2Cache Qcow2Cache;

+typedef struct Qcow2CryptoHeaderExtension {
+    uint64_t offset;
+    uint64_t length;
+} QEMU_PACKED Qcow2CryptoHeaderExtension;
+
 typedef struct Qcow2UnknownHeaderExtension {
    uint32_t magic;
    uint32_t len;
@@ -195,6 +205,14 @@ enum {
    QCOW2_COMPAT_FEAT_MASK            = QCOW2_COMPAT_LAZY_REFCOUNTS,
 };

+/* Autoclear feature bits */
+enum {
+    QCOW2_AUTOCLEAR_BITMAPS_BITNR = 0,
+    QCOW2_AUTOCLEAR_BITMAPS       = 1 << QCOW2_AUTOCLEAR_BITMAPS_BITNR,
+
+    QCOW2_AUTOCLEAR_MASK          = QCOW2_AUTOCLEAR_BITMAPS,
+};
+
 enum qcow2_discard_type {
    QCOW2_DISCARD_NEVER = 0,
    QCOW2_DISCARD_ALWAYS,
@@ -222,6 +240,13 @@ typedef uint64_t Qcow2GetRefcountFunc(const void *refcount_array,
 typedef void Qcow2SetRefcountFunc(void *refcount_array,
                                  uint64_t index, uint64_t value);

+typedef struct Qcow2BitmapHeaderExt {
+    uint32_t nb_bitmaps;
+    uint32_t reserved32;
+    uint64_t bitmap_directory_size;
+    uint64_t bitmap_directory_offset;
+} QEMU_PACKED Qcow2BitmapHeaderExt;
+
 typedef struct BDRVQcow2State {
    int cluster_bits;
    int cluster_size;
@@ -257,13 +282,21 @@ typedef struct BDRVQcow2State {

    CoMutex lock;

-    QCryptoCipher *cipher; /* current cipher, NULL if no key yet */
+    Qcow2CryptoHeaderExtension crypto_header; /* QCow2 header extension */
+    QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */
+    QCryptoBlock *crypto; /* Disk encryption format driver */
+    bool crypt_physical_offset; /* Whether to use virtual or physical offset
+                                   for encryption initialization vector tweak */
    uint32_t crypt_method_header;
    uint64_t snapshots_offset;
    int snapshots_size;
    unsigned int nb_snapshots;
    QCowSnapshot *snapshots;

+    uint32_t nb_bitmaps;
+    uint64_t bitmap_directory_size;
+    uint64_t bitmap_directory_offset;
+
    int flags;
    int qcow_version;
    bool use_lazy_refcounts;
@@ -301,10 +334,10 @@ typedef struct Qcow2COWRegion {
     * Offset of the COW region in bytes from the start of the first cluster
     * touched by the request.
     */
-    uint64_t    offset;
+    unsigned    offset;

    /** Number of bytes to copy */
-    int         nb_bytes;
+    unsigned    nb_bytes;
 } Qcow2COWRegion;

 /**
@@ -343,6 +376,13 @@ typedef struct QCowL2Meta
     */
    Qcow2COWRegion cow_end;

+    /**
+     * The I/O vector with the data from the actual guest write request.
+     * If non-NULL, this is meant to be merged together with the data
+     * from @cow_start and @cow_end into one single write operation.
+     */
+    QEMUIOVector *data_qiov;
+
    /** Pointer to next L2Meta of the same write request */
    struct QCowL2Meta *next;

@@ -485,6 +525,10 @@ static inline uint64_t refcount_diff(uint64_t r1, uint64_t r2)
 int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
                  int64_t sector_num, int nb_sectors);

+int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size,
+                                     int refcount_order, bool generous_increase,
+                                     uint64_t *refblock_count);
+
 int qcow2_mark_dirty(BlockDriverState *bs);
 int qcow2_mark_corrupt(BlockDriverState *bs);
 int qcow2_mark_consistent(BlockDriverState *bs);
@@ -505,6 +549,11 @@ int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index,
                                  uint64_t addend, bool decrease,
                                  enum qcow2_discard_type type);

+int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t offset,
+                            uint64_t additional_clusters, bool exact_size,
+                            int new_refblock_index,
+                            uint64_t new_refblock_offset);
+
 int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size);
 int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
                                int64_t nb_clusters);
@@ -527,6 +576,10 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
                                 int64_t size);
 int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
                                  int64_t size);
+int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
+                             void **refcount_table,
+                             int64_t *refcount_table_size,
+                             int64_t offset, int64_t size);

 int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
                                BlockDriverAmendStatusCB *status_cb,
@@ -538,8 +591,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
 int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
 int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
 int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
-                          uint8_t *out_buf, const uint8_t *in_buf,
-                          int nb_sectors, bool enc, Error **errp);
+                          uint8_t *buf, int nb_sectors, bool enc, Error **errp);

 int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
                             unsigned int *bytes, uint64_t *cluster_offset);
@@ -598,4 +650,20 @@ int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
    void **table);
 void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table);

+/* qcow2-bitmap.c functions */
+int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
+                                  void **refcount_table,
+                                  int64_t *refcount_table_size);
+bool qcow2_load_autoloading_dirty_bitmaps(BlockDriverState *bs, Error **errp);
+int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp);
+void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, Error **errp);
+int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp);
+bool qcow2_can_store_new_dirty_bitmap(BlockDriverState *bs,
+                                      const char *name,
+                                      uint32_t granularity,
+                                      Error **errp);
+void qcow2_remove_persistent_dirty_bitmap(BlockDriverState *bs,
+                                          const char *name,
+                                          Error **errp);
+
 #endif
--- a/block/qed-cluster.c
+++ b/block/qed-cluster.c
@@ -61,37 +61,65 @@ static unsigned int qed_count_contiguous_clusters(BDRVQEDState *s,
    return i - index;
 }

-typedef struct {
-    BDRVQEDState *s;
-    uint64_t pos;
-    size_t len;
-
-    QEDRequest *request;
-
-    /* User callback */
-    QEDFindClusterFunc *cb;
-    void *opaque;
-} QEDFindClusterCB;
-
-static void qed_find_cluster_cb(void *opaque, int ret)
+/**
+ * Find the offset of a data cluster
+ *
+ * @s:          QED state
+ * @request:    L2 cache entry
+ * @pos:        Byte position in device
+ * @len:        Number of bytes (may be shortened on return)
+ * @img_offset: Contains offset in the image file on success
+ *
+ * This function translates a position in the block device to an offset in the
+ * image file. The translated offset or unallocated range in the image file is
+ * reported back in *img_offset and *len.
+ *
+ * If the L2 table exists, request->l2_table points to the L2 table cache entry
+ * and the caller must free the reference when they are finished.  The cache
+ * entry is exposed in this way to avoid callers having to read the L2 table
+ * again later during request processing.  If request->l2_table is non-NULL it
+ * will be unreferenced before taking on the new cache entry.
+ *
+ * On success QED_CLUSTER_FOUND is returned and img_offset/len are a contiguous
+ * range in the image file.
+ *
+ * On failure QED_CLUSTER_L2 or QED_CLUSTER_L1 is returned for missing L2 or L1
+ * table offset, respectively. len is number of contiguous unallocated bytes.
+ *
+ * Called with table_lock held.
+ */
+int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request,
+                                  uint64_t pos, size_t *len,
+                                  uint64_t *img_offset)
 {
-    QEDFindClusterCB *find_cluster_cb = opaque;
-    BDRVQEDState *s = find_cluster_cb->s;
-    QEDRequest *request = find_cluster_cb->request;
+    uint64_t l2_offset;
    uint64_t offset = 0;
-    size_t len = 0;
    unsigned int index;
    unsigned int n;
+    int ret;

-    qed_acquire(s);
+    /* Limit length to L2 boundary.  Requests are broken up at the L2 boundary
+     * so that a request acts on one L2 table at a time.
+     */
+    *len = MIN(*len, (((pos >> s->l1_shift) + 1) << s->l1_shift) - pos);
+
+    l2_offset = s->l1_table->offsets[qed_l1_index(s, pos)];
+    if (qed_offset_is_unalloc_cluster(l2_offset)) {
+        *img_offset = 0;
+        return QED_CLUSTER_L1;
+    }
+    if (!qed_check_table_offset(s, l2_offset)) {
+        *img_offset = *len = 0;
+        return -EINVAL;
+    }
+
+    ret = qed_read_l2_table(s, request, l2_offset);
    if (ret) {
        goto out;
    }

-    index = qed_l2_index(s, find_cluster_cb->pos);
-    n = qed_bytes_to_clusters(s,
-                              qed_offset_into_cluster(s, find_cluster_cb->pos) +
-                              find_cluster_cb->len);
+    index = qed_l2_index(s, pos);
+    n = qed_bytes_to_clusters(s, qed_offset_into_cluster(s, pos) + *len);
    n = qed_count_contiguous_clusters(s, request->l2_table->table,
                                      index, n, &offset);

@@ -105,64 +133,10 @@ static void qed_find_cluster_cb(void *opaque, int ret)
        ret = -EINVAL;
    }

-    len = MIN(find_cluster_cb->len, n * s->header.cluster_size -
-              qed_offset_into_cluster(s, find_cluster_cb->pos));
+    *len = MIN(*len,
+               n * s->header.cluster_size - qed_offset_into_cluster(s, pos));

 out:
-    find_cluster_cb->cb(find_cluster_cb->opaque, ret, offset, len);
-    qed_release(s);
-    g_free(find_cluster_cb);
-}
-
-/**
- * Find the offset of a data cluster
- *
- * @s:          QED state
- * @request:    L2 cache entry
- * @pos:        Byte position in device
- * @len:        Number of bytes
- * @cb:         Completion function
- * @opaque:     User data for completion function
- *
- * This function translates a position in the block device to an offset in the
- * image file.  It invokes the cb completion callback to report back the
- * translated offset or unallocated range in the image file.
- *
- * If the L2 table exists, request->l2_table points to the L2 table cache entry
- * and the caller must free the reference when they are finished.  The cache
- * entry is exposed in this way to avoid callers having to read the L2 table
- * again later during request processing.  If request->l2_table is non-NULL it
- * will be unreferenced before taking on the new cache entry.
- */
-void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
-                      size_t len, QEDFindClusterFunc *cb, void *opaque)
-{
-    QEDFindClusterCB *find_cluster_cb;
-    uint64_t l2_offset;
-
-    /* Limit length to L2 boundary.  Requests are broken up at the L2 boundary
-     * so that a request acts on one L2 table at a time.
-     */
-    len = MIN(len, (((pos >> s->l1_shift) + 1) << s->l1_shift) - pos);
-
-    l2_offset = s->l1_table->offsets[qed_l1_index(s, pos)];
-    if (qed_offset_is_unalloc_cluster(l2_offset)) {
-        cb(opaque, QED_CLUSTER_L1, 0, len);
-        return;
-    }
-    if (!qed_check_table_offset(s, l2_offset)) {
-        cb(opaque, -EINVAL, 0, 0);
-        return;
-    }
-
-    find_cluster_cb = g_malloc(sizeof(*find_cluster_cb));
-    find_cluster_cb->s = s;
-    find_cluster_cb->pos = pos;
-    find_cluster_cb->len = len;
-    find_cluster_cb->cb = cb;
-    find_cluster_cb->opaque = opaque;
-    find_cluster_cb->request = request;
-
-    qed_read_l2_table(s, request, l2_offset,
-                      qed_find_cluster_cb, find_cluster_cb);
+    *img_offset = offset;
+    return ret;
 }
--- a/block/qed-gencb.c
+++ b/block/qed-gencb.c
@@ -1,33 +0,0 @@
-/*
- * QEMU Enhanced Disk Format
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "qed.h"
-
-void *gencb_alloc(size_t len, BlockCompletionFunc *cb, void *opaque)
-{
-    GenericCB *gencb = g_malloc(len);
-    gencb->cb = cb;
-    gencb->opaque = opaque;
-    return gencb;
-}
-
-void gencb_complete(void *opaque, int ret)
-{
-    GenericCB *gencb = opaque;
-    BlockCompletionFunc *cb = gencb->cb;
-    void *user_opaque = gencb->opaque;
-
-    g_free(gencb);
-    cb(user_opaque, ret);
-}
--- a/block/qed-l2-cache.c
+++ b/block/qed-l2-cache.c
@@ -101,6 +101,8 @@ CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache)
 /**
 * Decrease an entry's reference count and free if necessary when the reference
 * count drops to zero.
+ *
+ * Called with table_lock held.
 */
 void qed_unref_l2_cache_entry(CachedL2Table *entry)
 {
@@ -122,6 +124,8 @@ void qed_unref_l2_cache_entry(CachedL2Table *entry)
 *
 * For a cached entry, this function increases the reference count and returns
 * the entry.
+ *
+ * Called with table_lock held.
 */
 CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset)
 {
@@ -150,6 +154,8 @@ CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset)
 * N.B. This function steals a reference to the l2_table from the caller so the
 * caller must obtain a new reference by issuing a call to
 * qed_find_l2_cache_entry().
+ *
+ * Called with table_lock held.
 */
 void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table)
 {
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -18,99 +18,43 @@
 #include "qed.h"
 #include "qemu/bswap.h"

-typedef struct {
-    GenericCB gencb;
-    BDRVQEDState *s;
-    QEDTable *table;
-
-    struct iovec iov;
-    QEMUIOVector qiov;
-} QEDReadTableCB;
-
-static void qed_read_table_cb(void *opaque, int ret)
+/* Called either from qed_check or with table_lock held.  */
+static int qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table)
 {
-    QEDReadTableCB *read_table_cb = opaque;
-    QEDTable *table = read_table_cb->table;
-    BDRVQEDState *s = read_table_cb->s;
-    int noffsets = read_table_cb->qiov.size / sizeof(uint64_t);
-    int i;
+    QEMUIOVector qiov;
+    int noffsets;
+    int i, ret;

-    /* Handle I/O error */
-    if (ret) {
+    struct iovec iov = {
+        .iov_base = table->offsets,
+        .iov_len = s->header.cluster_size * s->header.table_size,
+    };
+    qemu_iovec_init_external(&qiov, &iov, 1);
+
+    trace_qed_read_table(s, offset, table);
+
+    if (qemu_in_coroutine()) {
+        qemu_co_mutex_unlock(&s->table_lock);
+    }
+    ret = bdrv_preadv(s->bs->file, offset, &qiov);
+    if (qemu_in_coroutine()) {
+        qemu_co_mutex_lock(&s->table_lock);
+    }
+    if (ret < 0) {
        goto out;
    }

    /* Byteswap offsets */
-    qed_acquire(s);
+    noffsets = qiov.size / sizeof(uint64_t);
    for (i = 0; i < noffsets; i++) {
        table->offsets[i] = le64_to_cpu(table->offsets[i]);
    }
-    qed_release(s);

+    ret = 0;
 out:
    /* Completion */
-    trace_qed_read_table_cb(s, read_table_cb->table, ret);
-    gencb_complete(&read_table_cb->gencb, ret);
-}
-
-static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
-                           BlockCompletionFunc *cb, void *opaque)
-{
-    QEDReadTableCB *read_table_cb = gencb_alloc(sizeof(*read_table_cb),
-                                                cb, opaque);
-    QEMUIOVector *qiov = &read_table_cb->qiov;
-
-    trace_qed_read_table(s, offset, table);
-
-    read_table_cb->s = s;
-    read_table_cb->table = table;
-    read_table_cb->iov.iov_base = table->offsets,
-    read_table_cb->iov.iov_len = s->header.cluster_size * s->header.table_size,
-
-    qemu_iovec_init_external(qiov, &read_table_cb->iov, 1);
-    bdrv_aio_readv(s->bs->file, offset / BDRV_SECTOR_SIZE, qiov,
-                   qiov->size / BDRV_SECTOR_SIZE,
-                   qed_read_table_cb, read_table_cb);
-}
-
-typedef struct {
-    GenericCB gencb;
-    BDRVQEDState *s;
-    QEDTable *orig_table;
-    QEDTable *table;
-    bool flush;             /* flush after write? */
-
-    struct iovec iov;
-    QEMUIOVector qiov;
-} QEDWriteTableCB;
-
-static void qed_write_table_cb(void *opaque, int ret)
-{
-    QEDWriteTableCB *write_table_cb = opaque;
-    BDRVQEDState *s = write_table_cb->s;
-
-    trace_qed_write_table_cb(s,
-                             write_table_cb->orig_table,
-                             write_table_cb->flush,
-                             ret);
-
-    if (ret) {
-        goto out;
-    }
-
-    if (write_table_cb->flush) {
-        /* We still need to flush first */
-        write_table_cb->flush = false;
-        qed_acquire(s);
-        bdrv_aio_flush(write_table_cb->s->bs, qed_write_table_cb,
-                       write_table_cb);
-        qed_release(s);
-        return;
-    }
-
-out:
-    qemu_vfree(write_table_cb->table);
-    gencb_complete(&write_table_cb->gencb, ret);
+    trace_qed_read_table_cb(s, table, ret);
+    return ret;
 }

 /**
@@ -122,17 +66,19 @@ out:
 * @index:      Index of first element
 * @n:          Number of elements
 * @flush:      Whether or not to sync to disk
- * @cb:         Completion function
- * @opaque:     Argument for completion function
+ *
+ * Called either from qed_check or with table_lock held.
 */
-static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
-                            unsigned int index, unsigned int n, bool flush,
-                            BlockCompletionFunc *cb, void *opaque)
+static int qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
+                           unsigned int index, unsigned int n, bool flush)
 {
-    QEDWriteTableCB *write_table_cb;
    unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1;
    unsigned int start, end, i;
+    QEDTable *new_table;
+    struct iovec iov;
+    QEMUIOVector qiov;
    size_t len_bytes;
+    int ret;

    trace_qed_write_table(s, offset, table, index, n);

@@ -142,157 +88,120 @@ static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,

    len_bytes = (end - start) * sizeof(uint64_t);

-    write_table_cb = gencb_alloc(sizeof(*write_table_cb), cb, opaque);
-    write_table_cb->s = s;
-    write_table_cb->orig_table = table;
-    write_table_cb->flush = flush;
-    write_table_cb->table = qemu_blockalign(s->bs, len_bytes);
-    write_table_cb->iov.iov_base = write_table_cb->table->offsets;
-    write_table_cb->iov.iov_len = len_bytes;
-    qemu_iovec_init_external(&write_table_cb->qiov, &write_table_cb->iov, 1);
+    new_table = qemu_blockalign(s->bs, len_bytes);
+    iov = (struct iovec) {
+        .iov_base = new_table->offsets,
+        .iov_len = len_bytes,
+    };
+    qemu_iovec_init_external(&qiov, &iov, 1);

    /* Byteswap table */
    for (i = start; i < end; i++) {
        uint64_t le_offset = cpu_to_le64(table->offsets[i]);
-        write_table_cb->table->offsets[i - start] = le_offset;
+        new_table->offsets[i - start] = le_offset;
    }

    /* Adjust for offset into table */
    offset += start * sizeof(uint64_t);

-    bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
-                    &write_table_cb->qiov,
-                    write_table_cb->qiov.size / BDRV_SECTOR_SIZE,
-                    qed_write_table_cb, write_table_cb);
-}
+    if (qemu_in_coroutine()) {
+        qemu_co_mutex_unlock(&s->table_lock);
+    }
+    ret = bdrv_pwritev(s->bs->file, offset, &qiov);
+    if (qemu_in_coroutine()) {
+        qemu_co_mutex_lock(&s->table_lock);
+    }
+    trace_qed_write_table_cb(s, table, flush, ret);
+    if (ret < 0) {
+        goto out;
+    }

-/**
- * Propagate return value from async callback
- */
-static void qed_sync_cb(void *opaque, int ret)
-{
-    *(int *)opaque = ret;
+    if (flush) {
+        ret = bdrv_flush(s->bs);
+        if (ret < 0) {
+            goto out;
+        }
+    }
+
+    ret = 0;
+out:
+    qemu_vfree(new_table);
+    return ret;
 }

 int qed_read_l1_table_sync(BDRVQEDState *s)
 {
-    int ret = -EINPROGRESS;
-
-    qed_read_table(s, s->header.l1_table_offset,
-                   s->l1_table, qed_sync_cb, &ret);
-    BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
-
-    return ret;
+    return qed_read_table(s, s->header.l1_table_offset, s->l1_table);
 }

-void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
-                        BlockCompletionFunc *cb, void *opaque)
+/* Called either from qed_check or with table_lock held.  */
+int qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n)
 {
    BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE);
-    qed_write_table(s, s->header.l1_table_offset,
-                    s->l1_table, index, n, false, cb, opaque);
+    return qed_write_table(s, s->header.l1_table_offset,
+                           s->l1_table, index, n, false);
 }

 int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
                            unsigned int n)
 {
-    int ret = -EINPROGRESS;
-
-    qed_write_l1_table(s, index, n, qed_sync_cb, &ret);
-    BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
-
-    return ret;
+    return qed_write_l1_table(s, index, n);
 }

-typedef struct {
-    GenericCB gencb;
-    BDRVQEDState *s;
-    uint64_t l2_offset;
-    QEDRequest *request;
-} QEDReadL2TableCB;
-
-static void qed_read_l2_table_cb(void *opaque, int ret)
+/* Called either from qed_check or with table_lock held.  */
+int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
 {
-    QEDReadL2TableCB *read_l2_table_cb = opaque;
-    QEDRequest *request = read_l2_table_cb->request;
-    BDRVQEDState *s = read_l2_table_cb->s;
-    CachedL2Table *l2_table = request->l2_table;
-    uint64_t l2_offset = read_l2_table_cb->l2_offset;
-
-    qed_acquire(s);
-    if (ret) {
-        /* can't trust loaded L2 table anymore */
-        qed_unref_l2_cache_entry(l2_table);
-        request->l2_table = NULL;
-    } else {
-        l2_table->offset = l2_offset;
-
-        qed_commit_l2_cache_entry(&s->l2_cache, l2_table);
-
-        /* This is guaranteed to succeed because we just committed the entry
-         * to the cache.
-         */
-        request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
-        assert(request->l2_table != NULL);
-    }
-    qed_release(s);
-
-    gencb_complete(&read_l2_table_cb->gencb, ret);
-}
-
-void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
-                       BlockCompletionFunc *cb, void *opaque)
-{
-    QEDReadL2TableCB *read_l2_table_cb;
+    int ret;

    qed_unref_l2_cache_entry(request->l2_table);

    /* Check for cached L2 entry */
    request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset);
    if (request->l2_table) {
-        cb(opaque, 0);
-        return;
+        return 0;
    }

    request->l2_table = qed_alloc_l2_cache_entry(&s->l2_cache);
    request->l2_table->table = qed_alloc_table(s);

-    read_l2_table_cb = gencb_alloc(sizeof(*read_l2_table_cb), cb, opaque);
-    read_l2_table_cb->s = s;
-    read_l2_table_cb->l2_offset = offset;
-    read_l2_table_cb->request = request;
-
    BLKDBG_EVENT(s->bs->file, BLKDBG_L2_LOAD);
-    qed_read_table(s, offset, request->l2_table->table,
-                   qed_read_l2_table_cb, read_l2_table_cb);
-}
+    ret = qed_read_table(s, offset, request->l2_table->table);

-int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
-{
-    int ret = -EINPROGRESS;
+    if (ret) {
+        /* can't trust loaded L2 table anymore */
+        qed_unref_l2_cache_entry(request->l2_table);
+        request->l2_table = NULL;
+    } else {
+        request->l2_table->offset = offset;

-    qed_read_l2_table(s, request, offset, qed_sync_cb, &ret);
-    BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
+        qed_commit_l2_cache_entry(&s->l2_cache, request->l2_table);
+
+        /* This is guaranteed to succeed because we just committed the entry
+         * to the cache.
+         */
+        request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset);
+        assert(request->l2_table != NULL);
+    }

    return ret;
 }

-void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
-                        unsigned int index, unsigned int n, bool flush,
-                        BlockCompletionFunc *cb, void *opaque)
+int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
+{
+    return qed_read_l2_table(s, request, offset);
+}
+
+/* Called either from qed_check or with table_lock held.  */
+int qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
+                       unsigned int index, unsigned int n, bool flush)
 {
    BLKDBG_EVENT(s->bs->file, BLKDBG_L2_UPDATE);
-    qed_write_table(s, request->l2_table->offset,
-                    request->l2_table->table, index, n, flush, cb, opaque);
+    return qed_write_table(s, request->l2_table->offset,
+                           request->l2_table->table, index, n, flush);
 }

 int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
                            unsigned int index, unsigned int n, bool flush)
 {
-    int ret = -EINPROGRESS;
-
-    qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret);
-    BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
-
-    return ret;
+    return qed_write_l2_table(s, request, index, n, flush);
 }
--- a/block/qed.c
+++ b/block/qed.c
--- a/block/qed.h
+++ b/block/qed.h
@@ -129,8 +129,7 @@ enum {
 };

 typedef struct QEDAIOCB {
-    BlockAIOCB common;
-    int bh_ret;                     /* final return status for completion bh */
+    BlockDriverState *bs;
    QSIMPLEQ_ENTRY(QEDAIOCB) next;  /* next request */
    int flags;                      /* QED_AIOCB_* bits ORed together */
    uint64_t end_pos;               /* request end on block device, in bytes */
@@ -152,18 +151,25 @@ typedef struct QEDAIOCB {

 typedef struct {
    BlockDriverState *bs;           /* device */
-    uint64_t file_size;             /* length of image file, in bytes */

+    /* Written only by an allocating write or the timer handler (the latter
+     * while allocating reqs are plugged).
+     */
    QEDHeader header;               /* always cpu-endian */
+
+    /* Protected by table_lock.  */
+    CoMutex table_lock;
    QEDTable *l1_table;
    L2TableCache l2_cache;          /* l2 table cache */
    uint32_t table_nelems;
    uint32_t l1_shift;
    uint32_t l2_shift;
    uint32_t l2_mask;
+    uint64_t file_size;             /* length of image file, in bytes */

    /* Allocating write request queue */
-    QSIMPLEQ_HEAD(, QEDAIOCB) allocating_write_reqs;
+    QEDAIOCB *allocating_acb;
+    CoQueue allocating_write_reqs;
    bool allocating_write_reqs_plugged;

    /* Periodic flush and clear need check flag */
@@ -177,41 +183,6 @@ enum {
    QED_CLUSTER_L1,            /* cluster missing in L1 */
 };

-/**
- * qed_find_cluster() completion callback
- *
- * @opaque:     User data for completion callback
- * @ret:        QED_CLUSTER_FOUND   Success
- *              QED_CLUSTER_L2      Data cluster unallocated in L2
- *              QED_CLUSTER_L1      L2 unallocated in L1
- *              -errno              POSIX error occurred
- * @offset:     Data cluster offset
- * @len:        Contiguous bytes starting from cluster offset
- *
- * This function is invoked when qed_find_cluster() completes.
- *
- * On success ret is QED_CLUSTER_FOUND and offset/len are a contiguous range
- * in the image file.
- *
- * On failure ret is QED_CLUSTER_L2 or QED_CLUSTER_L1 for missing L2 or L1
- * table offset, respectively.  len is number of contiguous unallocated bytes.
- */
-typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t len);
-
-void qed_acquire(BDRVQEDState *s);
-void qed_release(BDRVQEDState *s);
-
-/**
- * Generic callback for chaining async callbacks
- */
-typedef struct {
-    BlockCompletionFunc *cb;
-    void *opaque;
-} GenericCB;
-
-void *gencb_alloc(size_t len, BlockCompletionFunc *cb, void *opaque);
-void gencb_complete(void *opaque, int ret);
-
 /**
 * Header functions
 */
@@ -231,25 +202,23 @@ void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table);
 * Table I/O functions
 */
 int qed_read_l1_table_sync(BDRVQEDState *s);
-void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
-                        BlockCompletionFunc *cb, void *opaque);
+int qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n);
 int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
                            unsigned int n);
 int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
                           uint64_t offset);
-void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
-                       BlockCompletionFunc *cb, void *opaque);
-void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
-                        unsigned int index, unsigned int n, bool flush,
-                        BlockCompletionFunc *cb, void *opaque);
+int qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset);
+int qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
+                       unsigned int index, unsigned int n, bool flush);
 int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
                            unsigned int index, unsigned int n, bool flush);

 /**
 * Cluster functions
 */
-void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
-                      size_t len, QEDFindClusterFunc *cb, void *opaque);
+int coroutine_fn qed_find_cluster(BDRVQEDState *s, QEDRequest *request,
+                                  uint64_t pos, size_t *len,
+                                  uint64_t *img_offset);

 /**
 * Consistency check
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -259,12 +259,12 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
    *pnum = nb_sectors;
    *file = bs->file->bs;
    sector_num += s->offset / BDRV_SECTOR_SIZE;
-    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
+    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
           (sector_num << BDRV_SECTOR_BITS);
 }

 static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
-                                             int64_t offset, int count,
+                                             int64_t offset, int bytes,
                                             BdrvRequestFlags flags)
 {
    BDRVRawState *s = bs->opaque;
@@ -272,18 +272,18 @@ static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
        return -EINVAL;
    }
    offset += s->offset;
-    return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
+    return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }

 static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
-                                        int64_t offset, int count)
+                                        int64_t offset, int bytes)
 {
    BDRVRawState *s = bs->opaque;
    if (offset > UINT64_MAX - s->offset) {
        return -EINVAL;
    }
    offset += s->offset;
-    return bdrv_co_pdiscard(bs->file->bs, offset, count);
+    return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
 }

 static int64_t raw_getlength(BlockDriverState *bs)
@@ -312,6 +312,31 @@ static int64_t raw_getlength(BlockDriverState *bs)
    return s->size;
 }

+static BlockMeasureInfo *raw_measure(QemuOpts *opts, BlockDriverState *in_bs,
+                                     Error **errp)
+{
+    BlockMeasureInfo *info;
+    int64_t required;
+
+    if (in_bs) {
+        required = bdrv_getlength(in_bs);
+        if (required < 0) {
+            error_setg_errno(errp, -required, "Unable to get image size");
+            return NULL;
+        }
+    } else {
+        required = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+                            BDRV_SECTOR_SIZE);
+    }
+
+    info = g_new(BlockMeasureInfo, 1);
+    info->required = required;
+
+    /* Unallocated sectors count towards the file size in raw images */
+    info->fully_allocated = info->required;
+    return info;
+}
+
 static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
    return bdrv_get_info(bs->file->bs, bdi);
@@ -327,7 +352,8 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
    }
 }

-static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+static int raw_truncate(BlockDriverState *bs, int64_t offset,
+                        PreallocMode prealloc, Error **errp)
 {
    BDRVRawState *s = bs->opaque;

@@ -343,7 +369,7 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp)

    s->size = offset;
    offset += s->offset;
-    return bdrv_truncate(bs->file, offset, errp);
+    return bdrv_truncate(bs->file, offset, prealloc, errp);
 }

 static int raw_media_changed(BlockDriverState *bs)
@@ -479,6 +505,7 @@ BlockDriver bdrv_raw = {
    .bdrv_truncate        = &raw_truncate,
    .bdrv_getlength       = &raw_getlength,
    .has_variable_length  = true,
+    .bdrv_measure         = &raw_measure,
    .bdrv_get_info        = &raw_get_info,
    .bdrv_refresh_limits  = &raw_refresh_limits,
    .bdrv_probe_blocksizes = &raw_probe_blocksizes,
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -555,9 +555,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
     * filename encoded options */
    filename = qdict_get_try_str(options, "filename");
    if (filename) {
-        error_report("Warning: 'filename' option specified. "
-                      "This is an unsupported option, and may be deprecated "
-                      "in the future");
+        warn_report("'filename' option specified. "
+                    "This is an unsupported option, and may be deprecated "
+                    "in the future");
        qemu_rbd_parse_filename(filename, options, &local_err);
        if (local_err) {
            r = -EINVAL;
@@ -936,11 +936,18 @@ static int64_t qemu_rbd_getlength(BlockDriverState *bs)
    return info.size;
 }

-static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset,
+                             PreallocMode prealloc, Error **errp)
 {
    BDRVRBDState *s = bs->opaque;
    int r;

+    if (prealloc != PREALLOC_MODE_OFF) {
+        error_setg(errp, "Unsupported preallocation mode '%s'",
+                   PreallocMode_lookup[prealloc]);
+        return -ENOTSUP;
+    }
+
    r = rbd_resize(s->image, offset);
    if (r < 0) {
        error_setg_errno(errp, -r, "Failed to resize file");
@@ -1065,11 +1072,11 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
 #ifdef LIBRBD_SUPPORTS_DISCARD
 static BlockAIOCB *qemu_rbd_aio_pdiscard(BlockDriverState *bs,
                                         int64_t offset,
-                                         int count,
+                                         int bytes,
                                         BlockCompletionFunc *cb,
                                         void *opaque)
 {
-    return rbd_start_aio(bs, offset, NULL, count, cb, opaque,
+    return rbd_start_aio(bs, offset, NULL, bytes, cb, opaque,
                         RBD_AIO_DISCARD);
 }
 #endif
--- a/block/replication.c
+++ b/block/replication.c
@@ -234,10 +234,14 @@ static coroutine_fn int replication_co_readv(BlockDriverState *bs,
    }

    if (job) {
-        backup_wait_for_overlapping_requests(child->bs->job, sector_num,
-                                             remaining_sectors);
-        backup_cow_request_begin(&req, child->bs->job, sector_num,
-                                 remaining_sectors);
+        uint64_t remaining_bytes = remaining_sectors * BDRV_SECTOR_SIZE;
+
+        backup_wait_for_overlapping_requests(child->bs->job,
+                                             sector_num * BDRV_SECTOR_SIZE,
+                                             remaining_bytes);
+        backup_cow_request_begin(&req, child->bs->job,
+                                 sector_num * BDRV_SECTOR_SIZE,
+                                 remaining_bytes);
        ret = bdrv_co_readv(bs->file, sector_num, remaining_sectors,
                            qiov);
        backup_cow_request_end(&req);
@@ -260,7 +264,8 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs,
    BdrvChild *top = bs->file;
    BdrvChild *base = s->secondary_disk;
    BdrvChild *target;
-    int ret, n;
+    int ret;
+    int64_t n;

    ret = replication_get_io_status(s);
    if (ret < 0) {
@@ -279,14 +284,20 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs,
     */
    qemu_iovec_init(&hd_qiov, qiov->niov);
    while (remaining_sectors > 0) {
-        ret = bdrv_is_allocated_above(top->bs, base->bs, sector_num,
-                                      remaining_sectors, &n);
+        int64_t count;
+
+        ret = bdrv_is_allocated_above(top->bs, base->bs,
+                                      sector_num * BDRV_SECTOR_SIZE,
+                                      remaining_sectors * BDRV_SECTOR_SIZE,
+                                      &count);
        if (ret < 0) {
            goto out1;
        }

+        assert(QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE));
+        n = count >> BDRV_SECTOR_BITS;
        qemu_iovec_reset(&hd_qiov);
-        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, n * BDRV_SECTOR_SIZE);
+        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, count);

        target = ret ? top : base;
        ret = bdrv_co_writev(target, sector_num, n, &hd_qiov);
@@ -296,7 +307,7 @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs,

        remaining_sectors -= n;
        sector_num += n;
-        bytes_done += n * BDRV_SECTOR_SIZE;
+        bytes_done += count;
    }

 out1:
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -390,6 +390,7 @@ struct BDRVSheepdogState {
    QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head;
    QLIST_HEAD(failed_aio_head, AIOReq) failed_aio_head;

+    CoMutex queue_lock;
    CoQueue overlapping_queue;
    QLIST_HEAD(inflight_aiocb_head, SheepdogAIOCB) inflight_aiocb_head;
 };
@@ -488,7 +489,7 @@ static void wait_for_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *acb)
 retry:
    QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
        if (AIOCBOverlapping(acb, cb)) {
-            qemu_co_queue_wait(&s->overlapping_queue, NULL);
+            qemu_co_queue_wait(&s->overlapping_queue, &s->queue_lock);
            goto retry;
        }
    }
@@ -525,8 +526,10 @@ static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s,
        return;
    }

+    qemu_co_mutex_lock(&s->queue_lock);
    wait_for_overlapping_aiocb(s, acb);
    QLIST_INSERT_HEAD(&s->inflight_aiocb_head, acb, aiocb_siblings);
+    qemu_co_mutex_unlock(&s->queue_lock);
 }

 static SocketAddress *sd_socket_address(const char *path,
@@ -785,6 +788,7 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
     * have to move all the inflight requests to the failed queue before
     * resend_aioreq() is called.
     */
+    qemu_co_mutex_lock(&s->queue_lock);
    QLIST_FOREACH_SAFE(aio_req, &s->inflight_aio_head, aio_siblings, next) {
        QLIST_REMOVE(aio_req, aio_siblings);
        QLIST_INSERT_HEAD(&s->failed_aio_head, aio_req, aio_siblings);
@@ -794,8 +798,11 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
    while (!QLIST_EMPTY(&s->failed_aio_head)) {
        aio_req = QLIST_FIRST(&s->failed_aio_head);
        QLIST_REMOVE(aio_req, aio_siblings);
+        qemu_co_mutex_unlock(&s->queue_lock);
        resend_aioreq(s, aio_req);
+        qemu_co_mutex_lock(&s->queue_lock);
    }
+    qemu_co_mutex_unlock(&s->queue_lock);
 }

 /*
@@ -887,7 +894,10 @@ static void coroutine_fn aio_read_response(void *opaque)
    */
    s->co_recv = NULL;

+    qemu_co_mutex_lock(&s->queue_lock);
    QLIST_REMOVE(aio_req, aio_siblings);
+    qemu_co_mutex_unlock(&s->queue_lock);
+
    switch (rsp.result) {
    case SD_RES_SUCCESS:
        break;
@@ -1046,11 +1056,11 @@ static void sd_parse_uri(SheepdogConfig *cfg, const char *filename,
    }

    /* transport */
-    if (!strcmp(uri->scheme, "sheepdog")) {
+    if (!g_strcmp0(uri->scheme, "sheepdog")) {
        is_unix = false;
-    } else if (!strcmp(uri->scheme, "sheepdog+tcp")) {
+    } else if (!g_strcmp0(uri->scheme, "sheepdog+tcp")) {
        is_unix = false;
-    } else if (!strcmp(uri->scheme, "sheepdog+unix")) {
+    } else if (!g_strcmp0(uri->scheme, "sheepdog+unix")) {
        is_unix = true;
    } else {
        error_setg(&err, "URI scheme must be 'sheepdog', 'sheepdog+tcp',"
@@ -1307,7 +1317,9 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
    uint64_t old_oid = aio_req->base_oid;
    bool create = aio_req->create;

+    qemu_co_mutex_lock(&s->queue_lock);
    QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
+    qemu_co_mutex_unlock(&s->queue_lock);

    if (!nr_copies) {
        error_report("bug");
@@ -1678,6 +1690,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
    bs->total_sectors = s->inode.vdi_size / BDRV_SECTOR_SIZE;
    pstrcpy(s->name, sizeof(s->name), vdi);
    qemu_co_mutex_init(&s->lock);
+    qemu_co_mutex_init(&s->queue_lock);
    qemu_co_queue_init(&s->overlapping_queue);
    qemu_opts_del(opts);
    g_free(buf);
@@ -2153,13 +2166,20 @@ static int64_t sd_getlength(BlockDriverState *bs)
    return s->inode.vdi_size;
 }

-static int sd_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
+static int sd_truncate(BlockDriverState *bs, int64_t offset,
+                       PreallocMode prealloc, Error **errp)
 {
    BDRVSheepdogState *s = bs->opaque;
    int ret, fd;
    unsigned int datalen;
    uint64_t max_vdi_size;

+    if (prealloc != PREALLOC_MODE_OFF) {
+        error_setg(errp, "Unsupported preallocation mode '%s'",
+                   PreallocMode_lookup[prealloc]);
+        return -ENOTSUP;
+    }
+
    max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS;
    if (offset < s->inode.vdi_size) {
        error_setg(errp, "shrinking is not supported");
@@ -2431,12 +2451,16 @@ static void coroutine_fn sd_co_rw_vector(SheepdogAIOCB *acb)

 static void sd_aio_complete(SheepdogAIOCB *acb)
 {
+    BDRVSheepdogState *s;
    if (acb->aiocb_type == AIOCB_FLUSH_CACHE) {
        return;
    }

+    s = acb->s;
+    qemu_co_mutex_lock(&s->queue_lock);
    QLIST_REMOVE(acb, aiocb_siblings);
-    qemu_co_queue_restart_all(&acb->s->overlapping_queue);
+    qemu_co_queue_restart_all(&s->overlapping_queue);
+    qemu_co_mutex_unlock(&s->queue_lock);
 }

 static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
@@ -2448,7 +2472,7 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
    BDRVSheepdogState *s = bs->opaque;

    if (offset > s->inode.vdi_size) {
-        ret = sd_truncate(bs, offset, NULL);
+        ret = sd_truncate(bs, offset, PREALLOC_MODE_OFF, NULL);
        if (ret < 0) {
            return ret;
        }
@@ -2935,7 +2959,7 @@ static int sd_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,


 static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
-                                      int count)
+                                      int bytes)
 {
    SheepdogAIOCB acb;
    BDRVSheepdogState *s = bs->opaque;
@@ -2953,11 +2977,11 @@ static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
    iov.iov_len = sizeof(zero);
    discard_iov.iov = &iov;
    discard_iov.niov = 1;
-    if (!QEMU_IS_ALIGNED(offset | count, BDRV_SECTOR_SIZE)) {
+    if (!QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)) {
        return -ENOTSUP;
    }
    sd_aio_setup(&acb, s, &discard_iov, offset >> BDRV_SECTOR_BITS,
-                 count >> BDRV_SECTOR_BITS, AIOCB_DISCARD_OBJ);
+                 bytes >> BDRV_SECTOR_BITS, AIOCB_DISCARD_OBJ);
    sd_co_rw_vector(&acb);
    sd_aio_complete(&acb);

--- a/block/ssh.c
+++ b/block/ssh.c
@@ -204,7 +204,7 @@ static int parse_uri(const char *filename, QDict *options, Error **errp)
        return -EINVAL;
    }

-    if (strcmp(uri->scheme, "ssh") != 0) {
+    if (g_strcmp0(uri->scheme, "ssh") != 0) {
        error_setg(errp, "URI scheme must be 'ssh'");
        goto err;
    }
@@ -888,13 +888,22 @@ static int ssh_has_zero_init(BlockDriverState *bs)
    return has_zero_init;
 }

+typedef struct BDRVSSHRestart {
+    BlockDriverState *bs;
+    Coroutine *co;
+} BDRVSSHRestart;
+
 static void restart_coroutine(void *opaque)
 {
-    Coroutine *co = opaque;
+    BDRVSSHRestart *restart = opaque;
+    BlockDriverState *bs = restart->bs;
+    BDRVSSHState *s = bs->opaque;
+    AioContext *ctx = bdrv_get_aio_context(bs);

-    DPRINTF("co=%p", co);
+    DPRINTF("co=%p", restart->co);
+    aio_set_fd_handler(ctx, s->sock, false, NULL, NULL, NULL, NULL);

-    aio_co_wake(co);
+    aio_co_wake(restart->co);
 }

 /* A non-blocking call returned EAGAIN, so yield, ensuring the
@@ -905,7 +914,10 @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
 {
    int r;
    IOHandler *rd_handler = NULL, *wr_handler = NULL;
-    Coroutine *co = qemu_coroutine_self();
+    BDRVSSHRestart restart = {
+        .bs = bs,
+        .co = qemu_coroutine_self()
+    };

    r = libssh2_session_block_directions(s->session);

@@ -920,11 +932,9 @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
            rd_handler, wr_handler);

    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
-                       false, rd_handler, wr_handler, NULL, co);
+                       false, rd_handler, wr_handler, NULL, &restart);
    qemu_coroutine_yield();
    DPRINTF("s->sock=%d - back", s->sock);
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, false,
-                       NULL, NULL, NULL, NULL);
 }

 /* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
@@ -1114,8 +1124,8 @@ static coroutine_fn int ssh_co_writev(BlockDriverState *bs,
 static void unsafe_flush_warning(BDRVSSHState *s, const char *what)
 {
    if (!s->unsafe_flush_warning) {
-        error_report("warning: ssh server %s does not support fsync",
-                     s->inet->host);
+        warn_report("ssh server %s does not support fsync",
+                    s->inet->host);
        if (what) {
            error_report("to support fsync, you need %s", what);
        }
--- a/block/stream.c
+++ b/block/stream.c
@@ -41,25 +41,24 @@ typedef struct StreamBlockJob {
 } StreamBlockJob;

 static int coroutine_fn stream_populate(BlockBackend *blk,
-                                        int64_t sector_num, int nb_sectors,
+                                        int64_t offset, uint64_t bytes,
                                        void *buf)
 {
    struct iovec iov = {
        .iov_base = buf,
-        .iov_len  = nb_sectors * BDRV_SECTOR_SIZE,
+        .iov_len  = bytes,
    };
    QEMUIOVector qiov;

+    assert(bytes < SIZE_MAX);
    qemu_iovec_init_external(&qiov, &iov, 1);

    /* Copy-on-read the unallocated clusters */
-    return blk_co_preadv(blk, sector_num * BDRV_SECTOR_SIZE, qiov.size, &qiov,
-                         BDRV_REQ_COPY_ON_READ);
+    return blk_co_preadv(blk, offset, qiov.size, &qiov, BDRV_REQ_COPY_ON_READ);
 }

 typedef struct {
    int ret;
-    bool reached_end;
 } StreamCompleteData;

 static void stream_complete(BlockJob *job, void *opaque)
@@ -70,7 +69,7 @@ static void stream_complete(BlockJob *job, void *opaque)
    BlockDriverState *base = s->base;
    Error *local_err = NULL;

-    if (!block_job_is_cancelled(&s->common) && data->reached_end &&
+    if (!block_job_is_cancelled(&s->common) && bs->backing &&
        data->ret == 0) {
        const char *base_id = NULL, *base_fmt = NULL;
        if (base) {
@@ -108,12 +107,11 @@ static void coroutine_fn stream_run(void *opaque)
    BlockBackend *blk = s->common.blk;
    BlockDriverState *bs = blk_bs(blk);
    BlockDriverState *base = s->base;
-    int64_t sector_num = 0;
-    int64_t end = -1;
+    int64_t offset = 0;
    uint64_t delay_ns = 0;
    int error = 0;
    int ret = 0;
-    int n = 0;
+    int64_t n = 0; /* bytes */
    void *buf;

    if (!bs->backing) {
@@ -126,7 +124,6 @@ static void coroutine_fn stream_run(void *opaque)
        goto out;
    }

-    end = s->common.len >> BDRV_SECTOR_BITS;
    buf = qemu_blockalign(bs, STREAM_BUFFER_SIZE);

    /* Turn on copy-on-read for the whole block device so that guest read
@@ -138,7 +135,7 @@ static void coroutine_fn stream_run(void *opaque)
        bdrv_enable_copy_on_read(bs);
    }

-    for (sector_num = 0; sector_num < end; sector_num += n) {
+    for ( ; offset < s->common.len; offset += n) {
        bool copy;

        /* Note that even when no rate limit is applied we need to yield
@@ -151,26 +148,25 @@ static void coroutine_fn stream_run(void *opaque)

        copy = false;

-        ret = bdrv_is_allocated(bs, sector_num,
-                                STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n);
+        ret = bdrv_is_allocated(bs, offset, STREAM_BUFFER_SIZE, &n);
        if (ret == 1) {
            /* Allocated in the top, no need to copy.  */
        } else if (ret >= 0) {
            /* Copy if allocated in the intermediate images.  Limit to the
-             * known-unallocated area [sector_num, sector_num+n).  */
+             * known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE).  */
            ret = bdrv_is_allocated_above(backing_bs(bs), base,
-                                          sector_num, n, &n);
+                                          offset, n, &n);

            /* Finish early if end of backing file has been reached */
            if (ret == 0 && n == 0) {
-                n = end - sector_num;
+                n = s->common.len - offset;
            }

            copy = (ret == 1);
        }
-        trace_stream_one_iteration(s, sector_num, n, ret);
+        trace_stream_one_iteration(s, offset, n, ret);
        if (copy) {
-            ret = stream_populate(blk, sector_num, n, buf);
+            ret = stream_populate(blk, offset, n, buf);
        }
        if (ret < 0) {
            BlockErrorAction action =
@@ -189,7 +185,7 @@ static void coroutine_fn stream_run(void *opaque)
        ret = 0;

        /* Publish progress */
-        s->common.offset += n * BDRV_SECTOR_SIZE;
+        s->common.offset += n;
        if (copy && s->common.speed) {
            delay_ns = ratelimit_calculate_delay(&s->limit, n);
        }
@@ -208,7 +204,6 @@ out:
    /* Modify backing chain and close BDSes in main loop */
    data = g_malloc(sizeof(*data));
    data->ret = ret;
-    data->reached_end = sector_num == end;
    block_job_defer_to_main_loop(&s->common, stream_complete, data);
 }

@@ -220,7 +215,7 @@ static void stream_set_speed(BlockJob *job, int64_t speed, Error **errp)
        error_setg(errp, QERR_INVALID_PARAMETER, "speed");
        return;
    }
-    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
+    ratelimit_set_speed(&s->limit, speed, SLICE_TIME);
 }

 static const BlockJobDriver stream_job_driver = {
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -49,7 +49,7 @@
 * Again, all this is handled internally and is mostly transparent to
 * the outside. The 'throttle_timers' field however has an additional
 * constraint because it may be temporarily invalid (see for example
- * bdrv_set_aio_context()). Therefore in this file a thread will
+ * blk_set_aio_context()). Therefore in this file a thread will
 * access some other BlockBackend's timers only after verifying that
 * that BlockBackend has throttled requests in the queue.
 */
@@ -61,6 +61,7 @@ typedef struct ThrottleGroup {
    QLIST_HEAD(, BlockBackendPublic) head;
    BlockBackend *tokens[2];
    bool any_timer_armed[2];
+    QEMUClockType clock_type;

    /* These two are protected by the global throttle_groups_lock */
    unsigned refcount;
@@ -98,6 +99,12 @@ ThrottleState *throttle_group_incref(const char *name)
    if (!tg) {
        tg = g_new0(ThrottleGroup, 1);
        tg->name = g_strdup(name);
+        tg->clock_type = QEMU_CLOCK_REALTIME;
+
+        if (qtest_enabled()) {
+            /* For testing block IO throttling only */
+            tg->clock_type = QEMU_CLOCK_VIRTUAL;
+        }
        qemu_mutex_init(&tg->lock);
        throttle_init(&tg->ts);
        QLIST_INIT(&tg->head);
@@ -310,7 +317,7 @@ static void schedule_next_request(BlockBackend *blk, bool is_write)
            token = blk;
        } else {
            ThrottleTimers *tt = &blk_get_public(token)->throttle_timers;
-            int64_t now = qemu_clock_get_ns(tt->clock_type);
+            int64_t now = qemu_clock_get_ns(tg->clock_type);
            timer_mod(tt->timers[is_write], now);
            tg->any_timer_armed[is_write] = true;
        }
@@ -419,18 +426,10 @@ void throttle_group_restart_blk(BlockBackend *blk)
 void throttle_group_config(BlockBackend *blk, ThrottleConfig *cfg)
 {
    BlockBackendPublic *blkp = blk_get_public(blk);
-    ThrottleTimers *tt = &blkp->throttle_timers;
    ThrottleState *ts = blkp->throttle_state;
    ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
    qemu_mutex_lock(&tg->lock);
-    /* throttle_config() cancels the timers */
-    if (timer_pending(tt->timers[0])) {
-        tg->any_timer_armed[0] = false;
-    }
-    if (timer_pending(tt->timers[1])) {
-        tg->any_timer_armed[1] = false;
-    }
-    throttle_config(ts, tt, cfg);
+    throttle_config(ts, tg->clock_type, cfg);
    qemu_mutex_unlock(&tg->lock);

    throttle_group_restart_blk(blk);
@@ -497,13 +496,6 @@ void throttle_group_register_blk(BlockBackend *blk, const char *groupname)
    BlockBackendPublic *blkp = blk_get_public(blk);
    ThrottleState *ts = throttle_group_incref(groupname);
    ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
-    int clock_type = QEMU_CLOCK_REALTIME;
-
-    if (qtest_enabled()) {
-        /* For testing block IO throttling only */
-        clock_type = QEMU_CLOCK_VIRTUAL;
-    }
-
    blkp->throttle_state = ts;

    qemu_mutex_lock(&tg->lock);
@@ -518,7 +510,7 @@ void throttle_group_register_blk(BlockBackend *blk, const char *groupname)

    throttle_timers_init(&blkp->throttle_timers,
                         blk_get_aio_context(blk),
-                         clock_type,
+                         tg->clock_type,
                         read_timer_cb,
                         write_timer_cb,
                         blk);
--- a/block/trace-events
+++ b/block/trace-events
@@ -9,20 +9,17 @@ blk_co_preadv(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags
 blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags %x"

 # block/io.c
-bdrv_aio_flush(void *bs, void *opaque) "bs %p opaque %p"
-bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
-bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
 bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
 bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
 bdrv_co_pwrite_zeroes(void *bs, int64_t offset, int count, int flags) "bs %p offset %"PRId64" count %d flags %#x"
 bdrv_co_do_copy_on_readv(void *bs, int64_t offset, unsigned int bytes, int64_t cluster_offset, unsigned int cluster_bytes) "bs %p offset %"PRId64" bytes %u cluster_offset %"PRId64" cluster_bytes %u"

 # block/stream.c
-stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
+stream_one_iteration(void *s, int64_t offset, uint64_t bytes, int is_allocated) "s %p offset %" PRId64 " bytes %" PRIu64 " is_allocated %d"
 stream_start(void *bs, void *base, void *s) "bs %p base %p s %p"

 # block/commit.c
-commit_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
+commit_one_iteration(void *s, int64_t offset, uint64_t bytes, int is_allocated) "s %p offset %" PRId64 " bytes %" PRIu64 " is_allocated %d"
 commit_start(void *bs, void *base, void *top, void *s) "bs %p base %p top %p s %p"

 # block/mirror.c
@@ -31,14 +28,14 @@ mirror_restart_iter(void *s, int64_t cnt) "s %p dirty count %"PRId64
 mirror_before_flush(void *s) "s %p"
 mirror_before_drain(void *s, int64_t cnt) "s %p dirty count %"PRId64
 mirror_before_sleep(void *s, int64_t cnt, int synced, uint64_t delay_ns) "s %p dirty count %"PRId64" synced %d delay %"PRIu64"ns"
-mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p sector_num %"PRId64" nb_sectors %d"
-mirror_iteration_done(void *s, int64_t sector_num, int nb_sectors, int ret) "s %p sector_num %"PRId64" nb_sectors %d ret %d"
+mirror_one_iteration(void *s, int64_t offset, uint64_t bytes) "s %p offset %" PRId64 " bytes %" PRIu64
+mirror_iteration_done(void *s, int64_t offset, uint64_t bytes, int ret) "s %p offset %" PRId64 " bytes %" PRIu64 " ret %d"
 mirror_yield(void *s, int64_t cnt, int buf_free_count, int in_flight) "s %p dirty count %"PRId64" free buffers %d in_flight %d"
-mirror_yield_in_flight(void *s, int64_t sector_num, int in_flight) "s %p sector_num %"PRId64" in_flight %d"
+mirror_yield_in_flight(void *s, int64_t offset, int in_flight) "s %p offset %" PRId64 " in_flight %d"

 # block/backup.c
-backup_do_cow_enter(void *job, int64_t start, int64_t sector_num, int nb_sectors) "job %p start %"PRId64" sector_num %"PRId64" nb_sectors %d"
-backup_do_cow_return(void *job, int64_t sector_num, int nb_sectors, int ret) "job %p sector_num %"PRId64" nb_sectors %d ret %d"
+backup_do_cow_enter(void *job, int64_t start, int64_t offset, uint64_t bytes) "job %p start %" PRId64 " offset %" PRId64 " bytes %" PRIu64
+backup_do_cow_return(void *job, int64_t offset, uint64_t bytes, int ret) "job %p offset %" PRId64 " bytes %" PRIu64 " ret %d"
 backup_do_cow_skip(void *job, int64_t start) "job %p start %"PRId64
 backup_do_cow_process(void *job, int64_t start) "job %p start %"PRId64
 backup_do_cow_read_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -172,7 +172,7 @@ typedef struct {
    /* VDI header (converted to host endianness). */
    VdiHeader header;

-    CoMutex write_lock;
+    CoRwlock bmap_lock;

    Error *migration_blocker;
 } BDRVVdiState;
@@ -485,7 +485,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail_free_bmap;
    }

-    qemu_co_mutex_init(&s->write_lock);
+    qemu_co_rwlock_init(&s->bmap_lock);

    return 0;

@@ -557,7 +557,9 @@ vdi_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
               n_bytes, offset);

        /* prepare next AIO request */
+        qemu_co_rwlock_rdlock(&s->bmap_lock);
        bmap_entry = le32_to_cpu(s->bmap[block_index]);
+        qemu_co_rwlock_unlock(&s->bmap_lock);
        if (!VDI_IS_ALLOCATED(bmap_entry)) {
            /* Block not allocated, return zeros, no need to wait. */
            qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
@@ -595,6 +597,7 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
    uint32_t block_index;
    uint32_t offset_in_block;
    uint32_t n_bytes;
+    uint64_t data_offset;
    uint32_t bmap_first = VDI_UNALLOCATED;
    uint32_t bmap_last = VDI_UNALLOCATED;
    uint8_t *block = NULL;
@@ -614,10 +617,19 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
               n_bytes, offset);

        /* prepare next AIO request */
+        qemu_co_rwlock_rdlock(&s->bmap_lock);
        bmap_entry = le32_to_cpu(s->bmap[block_index]);
        if (!VDI_IS_ALLOCATED(bmap_entry)) {
            /* Allocate new block and write to it. */
            uint64_t data_offset;
+            qemu_co_rwlock_upgrade(&s->bmap_lock);
+            bmap_entry = le32_to_cpu(s->bmap[block_index]);
+            if (VDI_IS_ALLOCATED(bmap_entry)) {
+                /* A concurrent allocation did the work for us.  */
+                qemu_co_rwlock_downgrade(&s->bmap_lock);
+                goto nonallocating_write;
+            }
+
            bmap_entry = s->header.blocks_allocated;
            s->bmap[block_index] = cpu_to_le32(bmap_entry);
            s->header.blocks_allocated++;
@@ -635,30 +647,18 @@ vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
            memset(block + offset_in_block + n_bytes, 0,
                   s->block_size - n_bytes - offset_in_block);

-            /* Note that this coroutine does not yield anywhere from reading the
-             * bmap entry until here, so in regards to all the coroutines trying
-             * to write to this cluster, the one doing the allocation will
-             * always be the first to try to acquire the lock.
-             * Therefore, it is also the first that will actually be able to
-             * acquire the lock and thus the padded cluster is written before
-             * the other coroutines can write to the affected area. */
-            qemu_co_mutex_lock(&s->write_lock);
+            /* Write the new block under CoRwLock write-side protection,
+             * so this full-cluster write does not overlap a partial write
+             * of the same cluster, issued from the "else" branch.
+             */
            ret = bdrv_pwrite(bs->file, data_offset, block, s->block_size);
-            qemu_co_mutex_unlock(&s->write_lock);
+            qemu_co_rwlock_unlock(&s->bmap_lock);
        } else {
-            uint64_t data_offset = s->header.offset_data +
-                                   (uint64_t)bmap_entry * s->block_size +
-                                   offset_in_block;
-            qemu_co_mutex_lock(&s->write_lock);
-            /* This lock is only used to make sure the following write operation
-             * is executed after the write issued by the coroutine allocating
-             * this cluster, therefore we do not need to keep it locked.
-             * As stated above, the allocating coroutine will always try to lock
-             * the mutex before all the other concurrent accesses to that
-             * cluster, therefore at this point we can be absolutely certain
-             * that that write operation has returned (there may be other writes
-             * in flight, but they do not concern this very operation). */
-            qemu_co_mutex_unlock(&s->write_lock);
+nonallocating_write:
+            data_offset = s->header.offset_data +
+                           (uint64_t)bmap_entry * s->block_size +
+                           offset_in_block;
+            qemu_co_rwlock_unlock(&s->bmap_lock);

            qemu_iovec_reset(&local_qiov);
            qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
@@ -832,7 +832,8 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
    }

    if (image_type == VDI_TYPE_STATIC) {
-        ret = blk_truncate(blk, offset + blocks * block_size, errp);
+        ret = blk_truncate(blk, offset + blocks * block_size,
+                           PREALLOC_MODE_OFF, errp);
        if (ret < 0) {
            error_prepend(errp, "Failed to statically allocate %s", filename);
            goto exit;
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -548,7 +548,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
            if (new_file_size % (1024*1024)) {
                /* round up to nearest 1MB boundary */
                new_file_size = ((new_file_size >> 20) + 1) << 20;
-                bdrv_truncate(bs->file, new_file_size, NULL);
+                bdrv_truncate(bs->file, new_file_size, PREALLOC_MODE_OFF, NULL);
            }
        }
        qemu_vfree(desc_entries);
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1171,7 +1171,8 @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
    /* per the spec, the address for a block is in units of 1MB */
    *new_offset = ROUND_UP(*new_offset, 1024 * 1024);

-    return bdrv_truncate(bs->file, *new_offset + s->block_size, NULL);
+    return bdrv_truncate(bs->file, *new_offset + s->block_size,
+                         PREALLOC_MODE_OFF, NULL);
 }

 /*
@@ -1607,12 +1608,13 @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
    if (type == VHDX_TYPE_DYNAMIC) {
        /* All zeroes, so we can just extend the file - the end of the BAT
         * is the furthest thing we have written yet */
-        ret = blk_truncate(blk, data_file_offset, errp);
+        ret = blk_truncate(blk, data_file_offset, PREALLOC_MODE_OFF, errp);
        if (ret < 0) {
            goto exit;
        }
    } else if (type == VHDX_TYPE_FIXED) {
-        ret = blk_truncate(blk, data_file_offset + image_size, errp);
+        ret = blk_truncate(blk, data_file_offset + image_size,
+                           PREALLOC_MODE_OFF, errp);
        if (ret < 0) {
            goto exit;
        }
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -242,10 +242,11 @@ static void vmdk_free_last_extent(BlockDriverState *bs)
    s->extents = g_renew(VmdkExtent, s->extents, s->num_extents);
 }

-static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
+/* Return -ve errno, or 0 on success and write CID into *pcid. */
+static int vmdk_read_cid(BlockDriverState *bs, int parent, uint32_t *pcid)
 {
    char *desc;
-    uint32_t cid = 0xffffffff;
+    uint32_t cid;
    const char *p_name, *cid_str;
    size_t cid_str_size;
    BDRVVmdkState *s = bs->opaque;
@@ -254,8 +255,7 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
    desc = g_malloc0(DESC_SIZE);
    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
    if (ret < 0) {
-        g_free(desc);
-        return 0;
+        goto out;
    }

    if (parent) {
@@ -268,13 +268,21 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)

    desc[DESC_SIZE - 1] = '\0';
    p_name = strstr(desc, cid_str);
-    if (p_name != NULL) {
-        p_name += cid_str_size;
-        sscanf(p_name, "%" SCNx32, &cid);
+    if (p_name == NULL) {
+        ret = -EINVAL;
+        goto out;
    }
+    p_name += cid_str_size;
+    if (sscanf(p_name, "%" SCNx32, &cid) != 1) {
+        ret = -EINVAL;
+        goto out;
+    }
+    *pcid = cid;
+    ret = 0;

+out:
    g_free(desc);
-    return cid;
+    return ret;
 }

 static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
@@ -322,7 +330,10 @@ static int vmdk_is_cid_valid(BlockDriverState *bs)
    if (!s->cid_checked && bs->backing) {
        BlockDriverState *p_bs = bs->backing->bs;

-        cur_pcid = vmdk_read_cid(p_bs, 0);
+        if (vmdk_read_cid(p_bs, 0, &cur_pcid) != 0) {
+            /* read failure: report as not valid */
+            return 0;
+        }
        if (s->parent_cid != cur_pcid) {
            /* CID not valid */
            return 0;
@@ -975,8 +986,14 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
    if (ret) {
        goto fail;
    }
-    s->cid = vmdk_read_cid(bs, 0);
-    s->parent_cid = vmdk_read_cid(bs, 1);
+    ret = vmdk_read_cid(bs, 0, &s->cid);
+    if (ret) {
+        goto fail;
+    }
+    ret = vmdk_read_cid(bs, 1, &s->parent_cid);
+    if (ret) {
+        goto fail;
+    }
    qemu_co_mutex_init(&s->lock);

    /* Disable migration when VMDK images are used */
@@ -1714,7 +1731,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
    blk_set_allow_write_beyond_eof(blk, true);

    if (flat) {
-        ret = blk_truncate(blk, filesize, errp);
+        ret = blk_truncate(blk, filesize, PREALLOC_MODE_OFF, errp);
        goto exit;
    }
    magic = cpu_to_be32(VMDK4_MAGIC);
@@ -1777,7 +1794,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
        goto exit;
    }

-    ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, errp);
+    ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9,
+                       PREALLOC_MODE_OFF, errp);
    if (ret < 0) {
        goto exit;
    }
@@ -2007,8 +2025,11 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
            ret = -EINVAL;
            goto exit;
        }
-        parent_cid = vmdk_read_cid(blk_bs(blk), 0);
+        ret = vmdk_read_cid(blk_bs(blk), 0, &parent_cid);
        blk_unref(blk);
+        if (ret) {
+            goto exit;
+        }
        snprintf(parent_desc_line, BUF_SIZE,
                "parentFileNameHint=\"%s\"", backing_file);
    }
@@ -2086,7 +2107,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
    /* bdrv_pwrite write padding zeros to align to sector, we don't need that
     * for description file */
    if (desc_offset == 0) {
-        ret = blk_truncate(new_blk, desc_len, errp);
+        ret = blk_truncate(new_blk, desc_len, PREALLOC_MODE_OFF, errp);
    }
 exit:
    if (new_blk) {
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -460,17 +460,23 @@ static int vpc_reopen_prepare(BDRVReopenState *state,
 /*
 * Returns the absolute byte offset of the given sector in the image file.
 * If the sector is not allocated, -1 is returned instead.
+ * If an error occurred trying to write an updated block bitmap back to
+ * the file, -2 is returned, and the error value is written to *err.
+ * This can only happen for a write operation.
 *
 * The parameter write must be 1 if the offset will be used for a write
 * operation (the block bitmaps is updated then), 0 otherwise.
+ * If write is true then err must not be NULL.
 */
 static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
-                                       bool write)
+                                       bool write, int *err)
 {
    BDRVVPCState *s = bs->opaque;
    uint64_t bitmap_offset, block_offset;
    uint32_t pagetable_index, offset_in_block;

+    assert(!(write && err == NULL));
+
    pagetable_index = offset / s->block_size;
    offset_in_block = offset % s->block_size;

@@ -487,21 +493,20 @@ static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
       correctness. */
    if (write && (s->last_bitmap_offset != bitmap_offset)) {
        uint8_t bitmap[s->bitmap_size];
+        int r;

        s->last_bitmap_offset = bitmap_offset;
        memset(bitmap, 0xff, s->bitmap_size);
-        bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
+        r = bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
+        if (r < 0) {
+            *err = r;
+            return -2;
+        }
    }

    return block_offset;
 }

-static inline int64_t get_sector_offset(BlockDriverState *bs,
-                                        int64_t sector_num, bool write)
-{
-    return get_image_offset(bs, sector_num * BDRV_SECTOR_SIZE, write);
-}
-
 /*
 * Writes the footer to the end of the image file. This is needed when the
 * file grows as it overwrites the old footer
@@ -567,7 +572,7 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t offset)
    if (ret < 0)
        goto fail;

-    return get_image_offset(bs, offset, false);
+    return get_image_offset(bs, offset, false, NULL);

 fail:
    s->free_data_block_offset -= (s->block_size + s->bitmap_size);
@@ -607,7 +612,7 @@ vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
    qemu_iovec_init(&local_qiov, qiov->niov);

    while (bytes > 0) {
-        image_offset = get_image_offset(bs, offset, false);
+        image_offset = get_image_offset(bs, offset, false, NULL);
        n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));

        if (image_offset == -1) {
@@ -656,7 +661,11 @@ vpc_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
    qemu_iovec_init(&local_qiov, qiov->niov);

    while (bytes > 0) {
-        image_offset = get_image_offset(bs, offset, true);
+        image_offset = get_image_offset(bs, offset, true, &ret);
+        if (image_offset == -2) {
+            /* Failed to write block bitmap: can't proceed with write */
+            goto fail;
+        }
        n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));

        if (image_offset == -1) {
@@ -696,19 +705,23 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
    VHDFooter *footer = (VHDFooter*) s->footer_buf;
    int64_t start, offset;
    bool allocated;
+    int64_t ret;
    int n;

    if (be32_to_cpu(footer->type) == VHD_FIXED) {
        *pnum = nb_sectors;
        *file = bs->file->bs;
-        return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
+        return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
               (sector_num << BDRV_SECTOR_BITS);
    }

-    offset = get_sector_offset(bs, sector_num, 0);
+    qemu_co_mutex_lock(&s->lock);
+
+    offset = get_image_offset(bs, sector_num << BDRV_SECTOR_BITS, false, NULL);
    start = offset;
    allocated = (offset != -1);
    *pnum = 0;
+    ret = 0;

    do {
        /* All sectors in a block are contiguous (without using the bitmap) */
@@ -723,15 +736,18 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
         * sectors since there is always a bitmap in between. */
        if (allocated) {
            *file = bs->file->bs;
-            return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
+            ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
+            break;
        }
        if (nb_sectors == 0) {
            break;
        }
-        offset = get_sector_offset(bs, sector_num, 0);
+        offset = get_image_offset(bs, sector_num << BDRV_SECTOR_BITS, false,
+                                  NULL);
    } while (offset == -1);

-    return 0;
+    qemu_co_mutex_unlock(&s->lock);
+    return ret;
 }

 /*
@@ -858,7 +874,7 @@ static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
    /* Add footer to total size */
    total_size += HEADER_SIZE;

-    ret = blk_truncate(blk, total_size, errp);
+    ret = blk_truncate(blk, total_size, PREALLOC_MODE_OFF, errp);
    if (ret < 0) {
        return ret;
    }
--- a/block/vvfat.c
+++ b/block/vvfat.c
--- a/blockdev.c
+++ b/blockdev.c
@@ -50,6 +50,7 @@
 #include "qmp-commands.h"
 #include "block/trace.h"
 #include "sysemu/arch_init.h"
+#include "sysemu/qtest.h"
 #include "qemu/cutils.h"
 #include "qemu/help_option.h"
 #include "qemu/throttle-options.h"
@@ -592,10 +593,6 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,

        bs->detect_zeroes = detect_zeroes;

-        if (bdrv_key_required(bs)) {
-            autostart = 0;
-        }
-
        block_acct_setup(blk_get_stats(blk), account_invalid, account_failed);

        if (!parse_stats_intervals(blk_get_stats(blk), interval_list, errp)) {
@@ -798,6 +795,9 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
    const char *filename;
    Error *local_err = NULL;
    int i;
+    const char *deprecated[] = {
+        "serial", "trans", "secs", "heads", "cyls", "addr"
+    };

    /* Change legacy command line options into QMP ones */
    static const struct {
@@ -881,6 +881,16 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
                "update your scripts.\n");
    }

+    /* Other deprecated options */
+    if (!qtest_enabled()) {
+        for (i = 0; i < ARRAY_SIZE(deprecated); i++) {
+            if (qemu_opt_get(legacy_opts, deprecated[i]) != NULL) {
+                error_report("'%s' is deprecated, please use the corresponding "
+                             "option of '-device' instead", deprecated[i]);
+            }
+        }
+    }
+
    /* Media type */
    value = qemu_opt_get(legacy_opts, "media");
    if (value) {
@@ -900,7 +910,7 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
    copy_on_read = qemu_opt_get_bool(legacy_opts, "copy-on-read", false);

    if (read_only && copy_on_read) {
-        error_report("warning: disabling copy-on-read on read-only drive");
+        warn_report("disabling copy-on-read on read-only drive");
        copy_on_read = false;
    }

@@ -1700,7 +1710,8 @@ static void external_snapshot_prepare(BlkActionState *common,
        }

        flags = state->old_bs->open_flags;
-        flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
+        flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_COPY_ON_READ);
+        flags |= BDRV_O_NO_BACKING;

        /* create new image w/backing file */
        mode = s->has_mode ? s->mode : NEW_IMAGE_MODE_ABSOLUTE_PATHS;
@@ -1725,8 +1736,6 @@ static void external_snapshot_prepare(BlkActionState *common,
            qdict_put_str(options, "node-name", snapshot_node_name);
        }
        qdict_put_str(options, "driver", format);
-
-        flags |= BDRV_O_NO_BACKING;
    }

    state->new_bs = bdrv_open(new_image_file, snapshot_ref, options, flags,
@@ -1973,6 +1982,8 @@ static void block_dirty_bitmap_add_prepare(BlkActionState *common,
    /* AIO context taken and released within qmp_block_dirty_bitmap_add */
    qmp_block_dirty_bitmap_add(action->node, action->name,
                               action->has_granularity, action->granularity,
+                               action->has_persistent, action->persistent,
+                               action->has_autoload, action->autoload,
                               &local_err);

    if (!local_err) {
@@ -2023,6 +2034,9 @@ static void block_dirty_bitmap_clear_prepare(BlkActionState *common,
    } else if (!bdrv_dirty_bitmap_enabled(state->bitmap)) {
        error_setg(errp, "Cannot clear a disabled bitmap");
        return;
+    } else if (bdrv_dirty_bitmap_readonly(state->bitmap)) {
+        error_setg(errp, "Cannot clear a readonly bitmap");
+        return;
    }

    bdrv_clear_dirty_bitmap(state->bitmap, &state->backup);
@@ -2251,24 +2265,8 @@ void qmp_block_passwd(bool has_device, const char *device,
                      bool has_node_name, const char *node_name,
                      const char *password, Error **errp)
 {
-    Error *local_err = NULL;
-    BlockDriverState *bs;
-    AioContext *aio_context;
-
-    bs = bdrv_lookup_bs(has_device ? device : NULL,
-                        has_node_name ? node_name : NULL,
-                        &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
-
-    aio_context = bdrv_get_aio_context(bs);
-    aio_context_acquire(aio_context);
-
-    bdrv_add_key(bs, password, errp);
-
-    aio_context_release(aio_context);
+    error_setg(errp,
+               "Setting block passwords directly is no longer supported");
 }

 /*
@@ -2577,12 +2575,6 @@ void qmp_blockdev_change_medium(bool has_device, const char *device,
        goto fail;
    }

-    bdrv_add_key(medium_bs, NULL, &err);
-    if (err) {
-        error_propagate(errp, err);
-        goto fail;
-    }
-
    rc = do_open_tray(has_device ? device : NULL,
                      has_id ? id : NULL,
                      false, &err);
@@ -2717,9 +2709,12 @@ out:

 void qmp_block_dirty_bitmap_add(const char *node, const char *name,
                                bool has_granularity, uint32_t granularity,
+                                bool has_persistent, bool persistent,
+                                bool has_autoload, bool autoload,
                                Error **errp)
 {
    BlockDriverState *bs;
+    BdrvDirtyBitmap *bitmap;

    if (!name || name[0] == '\0') {
        error_setg(errp, "Bitmap name cannot be empty");
@@ -2742,7 +2737,32 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name,
        granularity = bdrv_get_default_bitmap_granularity(bs);
    }

-    bdrv_create_dirty_bitmap(bs, granularity, name, errp);
+    if (!has_persistent) {
+        persistent = false;
+    }
+    if (!has_autoload) {
+        autoload = false;
+    }
+
+    if (has_autoload && !persistent) {
+        error_setg(errp, "Autoload flag must be used only for persistent "
+                         "bitmaps");
+        return;
+    }
+
+    if (persistent &&
+        !bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp))
+    {
+        return;
+    }
+
+    bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp);
+    if (bitmap == NULL) {
+        return;
+    }
+
+    bdrv_dirty_bitmap_set_persistance(bitmap, persistent);
+    bdrv_dirty_bitmap_set_autoload(bitmap, autoload);
 }

 void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
@@ -2750,6 +2770,7 @@ void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
 {
    BlockDriverState *bs;
    BdrvDirtyBitmap *bitmap;
+    Error *local_err = NULL;

    bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp);
    if (!bitmap || !bs) {
@@ -2762,6 +2783,15 @@ void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
                   name);
        return;
    }
+
+    if (bdrv_dirty_bitmap_get_persistance(bitmap)) {
+        bdrv_remove_persistent_dirty_bitmap(bs, name, &local_err);
+        if (local_err != NULL) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+
    bdrv_dirty_bitmap_make_anon(bitmap);
    bdrv_release_dirty_bitmap(bs, bitmap);
 }
@@ -2791,11 +2821,39 @@ void qmp_block_dirty_bitmap_clear(const char *node, const char *name,
                   "Bitmap '%s' is currently disabled and cannot be cleared",
                   name);
        return;
+    } else if (bdrv_dirty_bitmap_readonly(bitmap)) {
+        error_setg(errp, "Bitmap '%s' is readonly and cannot be cleared", name);
+        return;
    }

    bdrv_clear_dirty_bitmap(bitmap, NULL);
 }

+BlockDirtyBitmapSha256 *qmp_x_debug_block_dirty_bitmap_sha256(const char *node,
+                                                              const char *name,
+                                                              Error **errp)
+{
+    BdrvDirtyBitmap *bitmap;
+    BlockDriverState *bs;
+    BlockDirtyBitmapSha256 *ret = NULL;
+    char *sha256;
+
+    bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp);
+    if (!bitmap || !bs) {
+        return NULL;
+    }
+
+    sha256 = bdrv_dirty_bitmap_sha256(bitmap, errp);
+    if (sha256 == NULL) {
+        return NULL;
+    }
+
+    ret = g_new(BlockDirtyBitmapSha256, 1);
+    ret->sha256 = sha256;
+
+    return ret;
+}
+
 void hmp_drive_del(Monitor *mon, const QDict *qdict)
 {
    const char *id = qdict_get_str(qdict, "id");
@@ -2899,7 +2957,7 @@ void qmp_block_resize(bool has_device, const char *device,
    }

    bdrv_drained_begin(bs);
-    ret = blk_truncate(blk, size, errp);
+    ret = blk_truncate(blk, size, PREALLOC_MODE_OFF, errp);
    bdrv_drained_end(bs);

 out:
@@ -3489,6 +3547,9 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
        backing_mode = MIRROR_OPEN_BACKING_CHAIN;
    }

+    /* Don't open backing image in create() */
+    flags |= BDRV_O_NO_BACKING;
+
    if ((arg->sync == MIRROR_SYNC_MODE_FULL || !source)
        && arg->mode != NEW_IMAGE_MODE_EXISTING)
    {
@@ -3528,8 +3589,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
    /* Mirroring takes care of copy-on-write using the source's backing
     * file.
     */
-    target_bs = bdrv_open(arg->target, NULL, options,
-                          flags | BDRV_O_NO_BACKING, errp);
+    target_bs = bdrv_open(arg->target, NULL, options, flags, errp);
    if (!target_bs) {
        goto out;
    }
@@ -3852,13 +3912,6 @@ void qmp_blockdev_add(BlockdevOptions *options, Error **errp)

    QTAILQ_INSERT_TAIL(&monitor_bdrv_states, bs, monitor_list);

-    if (bs && bdrv_key_required(bs)) {
-        QTAILQ_REMOVE(&monitor_bdrv_states, bs, monitor_list);
-        bdrv_unref(bs);
-        error_setg(errp, "blockdev-add doesn't support encrypted devices");
-        goto fail;
-    }
-
 fail:
    visit_free(v);
 }
--- a/blockjob.c
+++ b/blockjob.c
@@ -139,7 +139,7 @@ static void block_job_resume(BlockJob *job)
    block_job_enter(job);
 }

-static void block_job_ref(BlockJob *job)
+void block_job_ref(BlockJob *job)
 {
    ++job->refcnt;
 }
@@ -148,7 +148,7 @@ static void block_job_attached_aio_context(AioContext *new_context,
                                           void *opaque);
 static void block_job_detach_aio_context(void *opaque);

-static void block_job_unref(BlockJob *job)
+void block_job_unref(BlockJob *job)
 {
    if (--job->refcnt == 0) {
        BlockDriverState *bs = blk_bs(job->blk);
--- a/bootdevice.c
+++ b/bootdevice.c
@@ -27,7 +27,7 @@
 #include "sysemu/sysemu.h"
 #include "qapi/visitor.h"
 #include "qemu/error-report.h"
-#include "hw/hw.h"
+#include "sysemu/reset.h"
 #include "hw/qdev-core.h"

 typedef struct FWBootEntry FWBootEntry;
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -25,7 +25,6 @@
 #include "qemu/config-file.h"
 #include "qemu/path.h"
 #include "qemu/help_option.h"
-/* For tb_lock */
 #include "cpu.h"
 #include "exec/exec-all.h"
 #include "tcg.h"
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -85,6 +85,8 @@ struct emulated_sigtable {
 /* NOTE: we force a big alignment so that the stack stored after is
   aligned too */
 typedef struct TaskState {
+    pid_t ts_tid;     /* tid (or pid) of this task */
+
    struct TaskState *next;
    int used; /* non zero if used */
    struct image_info *info;
--- a/chardev/char-fe.c
+++ b/chardev/char-fe.c
@@ -179,9 +179,21 @@ void qemu_chr_fe_printf(CharBackend *be, const char *fmt, ...)

 Chardev *qemu_chr_fe_get_driver(CharBackend *be)
 {
+    /* this is unsafe for the users that support chardev hotswap */
+    assert(be->chr_be_change == NULL);
    return be->chr;
 }

+bool qemu_chr_fe_backend_connected(CharBackend *be)
+{
+    return !!be->chr;
+}
+
+bool qemu_chr_fe_backend_open(CharBackend *be)
+{
+    return be->chr && be->chr->be_open;
+}
+
 bool qemu_chr_fe_init(CharBackend *b, Chardev *s, Error **errp)
 {
    int tag = 0;
@@ -216,7 +228,7 @@ void qemu_chr_fe_deinit(CharBackend *b, bool del)
    assert(b);

    if (b->chr) {
-        qemu_chr_fe_set_handlers(b, NULL, NULL, NULL, NULL, NULL, true);
+        qemu_chr_fe_set_handlers(b, NULL, NULL, NULL, NULL, NULL, NULL, true);
        if (b->chr->be == b) {
            b->chr->be = NULL;
        }
@@ -235,6 +247,7 @@ void qemu_chr_fe_set_handlers(CharBackend *b,
                              IOCanReadHandler *fd_can_read,
                              IOReadHandler *fd_read,
                              IOEventHandler *fd_event,
+                              BackendChangeHandler *be_change,
                              void *opaque,
                              GMainContext *context,
                              bool set_open)
@@ -258,6 +271,7 @@ void qemu_chr_fe_set_handlers(CharBackend *b,
    b->chr_can_read = fd_can_read;
    b->chr_read = fd_read;
    b->chr_event = fd_event;
+    b->chr_be_change = be_change;
    b->opaque = opaque;
    if (cc->chr_update_read_handler) {
        cc->chr_update_read_handler(s, context);
--- a/chardev/char-mux.c
+++ b/chardev/char-mux.c
@@ -278,6 +278,7 @@ void mux_chr_set_handlers(Chardev *chr, GMainContext *context)
                             mux_chr_can_read,
                             mux_chr_read,
                             mux_chr_event,
+                             NULL,
                             chr,
                             context, true);
 }
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -454,7 +454,9 @@ static int tcp_chr_sync_read(Chardev *chr, const uint8_t *buf, int len)
        return 0;
    }

+    qio_channel_set_blocking(s->ioc, true, NULL);
    size = tcp_chr_recv(chr, (void *) buf, len);
+    qio_channel_set_blocking(s->ioc, false, NULL);
    if (size == 0) {
        /* connection closed */
        tcp_chr_disconnect(chr);
@@ -765,8 +767,8 @@ static int tcp_chr_wait_connected(Chardev *chr, Error **errp)
     * in TLS and telnet cases, only wait for an accepted socket */
    while (!s->ioc) {
        if (s->is_listen) {
-            error_report("QEMU waiting for connection on: %s",
-                         chr->filename);
+            info_report("QEMU waiting for connection on: %s",
+                        chr->filename);
            qio_channel_set_blocking(QIO_CHANNEL(s->listen_ioc), true, NULL);
            tcp_chr_accept(QIO_CHANNEL(s->listen_ioc), G_IO_IN, chr);
            qio_channel_set_blocking(QIO_CHANNEL(s->listen_ioc), false, NULL);
--- a/chardev/char.c
+++ b/chardev/char.c
@@ -556,17 +556,23 @@ help_string_append(const char *name, void *opaque)
    g_string_append_printf(str, "\n%s", name);
 }

-Chardev *qemu_chr_new_from_opts(QemuOpts *opts,
-                                Error **errp)
+static const char *chardev_alias_translate(const char *name)
+{
+    int i;
+    for (i = 0; i < (int)ARRAY_SIZE(chardev_alias_table); i++) {
+        if (g_strcmp0(chardev_alias_table[i].alias, name) == 0) {
+            return chardev_alias_table[i].typename;
+        }
+    }
+    return name;
+}
+
+ChardevBackend *qemu_chr_parse_opts(QemuOpts *opts, Error **errp)
 {
    Error *local_err = NULL;
    const ChardevClass *cc;
-    Chardev *chr;
-    int i;
    ChardevBackend *backend = NULL;
-    const char *name = qemu_opt_get(opts, "backend");
-    const char *id = qemu_opts_id(opts);
-    char *bid = NULL;
+    const char *name = chardev_alias_translate(qemu_opt_get(opts, "backend"));

    if (name == NULL) {
        error_setg(errp, "chardev: \"%s\" missing backend",
@@ -574,7 +580,40 @@ Chardev *qemu_chr_new_from_opts(QemuOpts *opts,
        return NULL;
    }

-    if (is_help_option(name)) {
+    cc = char_get_class(name, errp);
+    if (cc == NULL) {
+        return NULL;
+    }
+
+    backend = g_new0(ChardevBackend, 1);
+    backend->type = CHARDEV_BACKEND_KIND_NULL;
+
+    if (cc->parse) {
+        cc->parse(opts, backend, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            qapi_free_ChardevBackend(backend);
+            return NULL;
+        }
+    } else {
+        ChardevCommon *ccom = g_new0(ChardevCommon, 1);
+        qemu_chr_parse_common(opts, ccom);
+        backend->u.null.data = ccom; /* Any ChardevCommon member would work */
+    }
+
+    return backend;
+}
+
+Chardev *qemu_chr_new_from_opts(QemuOpts *opts, Error **errp)
+{
+    const ChardevClass *cc;
+    Chardev *chr = NULL;
+    ChardevBackend *backend = NULL;
+    const char *name = chardev_alias_translate(qemu_opt_get(opts, "backend"));
+    const char *id = qemu_opts_id(opts);
+    char *bid = NULL;
+
+    if (name && is_help_option(name)) {
        GString *str = g_string_new("");

        chardev_name_foreach(help_string_append, str);
@@ -589,38 +628,20 @@ Chardev *qemu_chr_new_from_opts(QemuOpts *opts,
        return NULL;
    }

-    for (i = 0; i < (int)ARRAY_SIZE(chardev_alias_table); i++) {
-        if (g_strcmp0(chardev_alias_table[i].alias, name) == 0) {
-            name = chardev_alias_table[i].typename;
-            break;
-        }
+    backend = qemu_chr_parse_opts(opts, errp);
+    if (backend == NULL) {
+        return NULL;
    }

    cc = char_get_class(name, errp);
    if (cc == NULL) {
-        return NULL;
+        goto out;
    }

-    backend = g_new0(ChardevBackend, 1);
-    backend->type = CHARDEV_BACKEND_KIND_NULL;
-
    if (qemu_opt_get_bool(opts, "mux", 0)) {
        bid = g_strdup_printf("%s-base", id);
    }

-    chr = NULL;
-    if (cc->parse) {
-        cc->parse(opts, backend, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            goto out;
-        }
-    } else {
-        ChardevCommon *ccom = g_new0(ChardevCommon, 1);
-        qemu_chr_parse_common(opts, ccom);
-        backend->u.null.data = ccom; /* Any ChardevCommon member would work */
-    }
-
    chr = qemu_chardev_new(bid ? bid : id,
                           object_class_get_name(OBJECT_CLASS(cc)),
                           backend, errp);
@@ -930,6 +951,89 @@ ChardevReturn *qmp_chardev_add(const char *id, ChardevBackend *backend,
    return ret;
 }

+ChardevReturn *qmp_chardev_change(const char *id, ChardevBackend *backend,
+                                  Error **errp)
+{
+    CharBackend *be;
+    const ChardevClass *cc;
+    Chardev *chr, *chr_new;
+    bool closed_sent = false;
+    ChardevReturn *ret;
+
+    chr = qemu_chr_find(id);
+    if (!chr) {
+        error_setg(errp, "Chardev '%s' does not exist", id);
+        return NULL;
+    }
+
+    if (CHARDEV_IS_MUX(chr)) {
+        error_setg(errp, "Mux device hotswap not supported yet");
+        return NULL;
+    }
+
+    if (qemu_chr_replay(chr)) {
+        error_setg(errp,
+            "Chardev '%s' cannot be changed in record/replay mode", id);
+        return NULL;
+    }
+
+    be = chr->be;
+    if (!be) {
+        /* easy case */
+        object_unparent(OBJECT(chr));
+        return qmp_chardev_add(id, backend, errp);
+    }
+
+    if (!be->chr_be_change) {
+        error_setg(errp, "Chardev user does not support chardev hotswap");
+        return NULL;
+    }
+
+    cc = char_get_class(ChardevBackendKind_lookup[backend->type], errp);
+    if (!cc) {
+        return NULL;
+    }
+
+    chr_new = qemu_chardev_new(NULL, object_class_get_name(OBJECT_CLASS(cc)),
+                               backend, errp);
+    if (!chr_new) {
+        return NULL;
+    }
+    chr_new->label = g_strdup(id);
+
+    if (chr->be_open && !chr_new->be_open) {
+        qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
+        closed_sent = true;
+    }
+
+    chr->be = NULL;
+    qemu_chr_fe_init(be, chr_new, &error_abort);
+
+    if (be->chr_be_change(be->opaque) < 0) {
+        error_setg(errp, "Chardev '%s' change failed", chr_new->label);
+        chr_new->be = NULL;
+        qemu_chr_fe_init(be, chr, &error_abort);
+        if (closed_sent) {
+            qemu_chr_be_event(chr, CHR_EVENT_OPENED);
+        }
+        object_unref(OBJECT(chr_new));
+        return NULL;
+    }
+
+    object_unparent(OBJECT(chr));
+    object_property_add_child(get_chardevs_root(), chr_new->label,
+                              OBJECT(chr_new), &error_abort);
+    object_unref(OBJECT(chr_new));
+
+    ret = g_new0(ChardevReturn, 1);
+    if (CHARDEV_IS_PTY(chr_new)) {
+        ret->pty = g_strdup(chr_new->filename + 4);
+        ret->has_pty = true;
+    }
+
+    return ret;
+}
+
 void qmp_chardev_remove(const char *id, Error **errp)
 {
    Chardev *chr;
@@ -951,6 +1055,18 @@ void qmp_chardev_remove(const char *id, Error **errp)
    object_unparent(OBJECT(chr));
 }

+void qmp_chardev_send_break(const char *id, Error **errp)
+{
+    Chardev *chr;
+
+    chr = qemu_chr_find(id);
+    if (chr == NULL) {
+        error_setg(errp, "Chardev '%s' not found", id);
+        return;
+    }
+    qemu_chr_be_event(chr, CHR_EVENT_BREAK);
+}
+
 void qemu_chr_cleanup(void)
 {
    object_unparent(get_chardevs_root());
--- a/274
+++ b/274
@@ -40,14 +40,18 @@ printf " '%s'" "$0" "$@" >> config.log
 echo >> config.log
 echo "#" >> config.log

-error_exit() {
-    echo
+print_error() {
+    (echo
    echo "ERROR: $1"
    while test -n "$2"; do
        echo "       $2"
        shift
    done
-    echo
+    echo) >&2
+}
+
+error_exit() {
+    print_error "$@"
    exit 1
 }

@@ -163,6 +167,79 @@ have_backend () {
    echo "$trace_backends" | grep "$1" >/dev/null
 }

+glob() {
+    eval test -z '"${1#'"$2"'}"'
+}
+
+supported_hax_target() {
+    test "$hax" = "yes" || return 1
+    glob "$1" "*-softmmu" || return 1
+    case "${1%-softmmu}" in
+        i386|x86_64)
+            return 0
+        ;;
+    esac
+    return 1
+}
+
+supported_kvm_target() {
+    test "$kvm" = "yes" || return 1
+    glob "$1" "*-softmmu" || return 1
+    case "${1%-softmmu}:$cpu" in
+        arm:arm | aarch64:aarch64 | \
+        i386:i386 | i386:x86_64 | i386:x32 | \
+        x86_64:i386 | x86_64:x86_64 | x86_64:x32 | \
+        mips:mips | mipsel:mips | \
+        ppc:ppc | ppcemb:ppc | ppc64:ppc | \
+        ppc:ppc64 | ppcemb:ppc64 | ppc64:ppc64 | \
+        s390x:s390x)
+            return 0
+        ;;
+    esac
+    return 1
+}
+
+supported_xen_target() {
+    test "$xen" = "yes" || return 1
+    glob "$1" "*-softmmu" || return 1
+    # Only i386 and x86_64 provide the xenpv machine.
+    case "${1%-softmmu}" in
+        i386|x86_64)
+            return 0
+        ;;
+    esac
+    return 1
+}
+
+supported_target() {
+    case "$1" in
+        *-softmmu)
+            ;;
+        *-linux-user)
+            if test "$linux" != "yes"; then
+                print_error "Target '$target' is only available on a Linux host"
+                return 1
+            fi
+            ;;
+        *-bsd-user)
+            if test "$bsd" != "yes"; then
+                print_error "Target '$target' is only available on a BSD host"
+                return 1
+            fi
+            ;;
+        *)
+            print_error "Invalid target name '$target'"
+            return 1
+            ;;
+    esac
+    test "$tcg" = "yes" && return 0
+    supported_kvm_target "$1" && return 0
+    supported_xen_target "$1" && return 0
+    supported_hax_target "$1" && return 0
+    print_error "TCG disabled, but hardware accelerator not available for '$target'"
+    return 1
+}
+
 # default parameters
 source_path=$(dirname "$0")
 cpu=""
@@ -224,6 +301,7 @@ cap_ng=""
 attr=""
 libattr=""
 xfs=""
+tcg="yes"

 vhost_net="no"
 vhost_scsi="no"
@@ -297,6 +375,7 @@ libnfs=""
 coroutine=""
 coroutine_pool=""
 debug_stack_usage="no"
+crypto_afalg="no"
 seccomp=""
 glusterfs=""
 glusterfs_xlator_opt="no"
@@ -961,6 +1040,10 @@ for opt do
  ;;
  --enable-cap-ng) cap_ng="yes"
  ;;
+  --disable-tcg) tcg="no"
+  ;;
+  --enable-tcg) tcg="yes"
+  ;;
  --disable-spice) spice="no"
  ;;
  --enable-spice) spice="yes"
@@ -1042,6 +1125,10 @@ for opt do
  ;;
  --enable-debug-stack-usage) debug_stack_usage="yes"
  ;;
+  --enable-crypto-afalg) crypto_afalg="yes"
+  ;;
+  --disable-crypto-afalg) crypto_afalg="no"
+  ;;
  --disable-docs) docs="no"
  ;;
  --enable-docs) docs="yes"
@@ -1436,6 +1523,7 @@ disabled with --disable-FEATURE, default is enabled if available:
  qom-cast-debug  cast debugging support
  tools           build qemu-io, qemu-nbd and qemu-image tools
  vxhs            Veritas HyperScale vDisk backend support
+  crypto-afalg    Linux AF_ALG crypto backend driver

 NOTE: The object files are built at the place where configure is launched
 EOF
@@ -1501,7 +1589,7 @@ gcc_flags="-Wold-style-declaration -Wold-style-definition -Wtype-limits"
 gcc_flags="-Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers $gcc_flags"
 gcc_flags="-Wno-missing-include-dirs -Wempty-body -Wnested-externs $gcc_flags"
 gcc_flags="-Wendif-labels -Wno-shift-negative-value $gcc_flags"
-gcc_flags="-Wno-initializer-overrides $gcc_flags"
+gcc_flags="-Wno-initializer-overrides -Wexpansion-to-defined $gcc_flags"
 gcc_flags="-Wno-string-plus-int $gcc_flags"
 # Note that we do not add -Werror to gcc_flags here, because that would
 # enable it for all configure tests. If a configure test failed due
@@ -1690,23 +1778,27 @@ if test "$solaris" = "yes" ; then
 fi

 if test -z "${target_list+xxx}" ; then
-    target_list="$default_target_list"
+    for target in $default_target_list; do
+        supported_target $target 2>/dev/null && \
+            target_list="$target_list $target"
+    done
+    target_list="${target_list# }"
 else
    target_list=$(echo "$target_list" | sed -e 's/,/ /g')
+    for target in $target_list; do
+        # Check that we recognised the target name; this allows a more
+        # friendly error message than if we let it fall through.
+        case " $default_target_list " in
+            *" $target "*)
+                ;;
+            *)
+                error_exit "Unknown target name '$target'"
+                ;;
+        esac
+        supported_target $target || exit 1
+    done
 fi

-# Check that we recognised the target name; this allows a more
-# friendly error message than if we let it fall through.
-for target in $target_list; do
-    case " $default_target_list " in
-        *" $target "*)
-            ;;
-        *)
-            error_exit "Unknown target name '$target'"
-            ;;
-    esac
-done
-
 # see if system emulation was really requested
 case " $target_list " in
  *"-softmmu "*) softmmu=yes
@@ -2021,6 +2113,24 @@ EOF
    # Xen unstable
    elif
        cat > $TMPC <<EOF &&
+#undef XC_WANT_COMPAT_MAP_FOREIGN_API
+#include <xenforeignmemory.h>
+int main(void) {
+  xenforeignmemory_handle *xfmem;
+
+  xfmem = xenforeignmemory_open(0, 0);
+  xenforeignmemory_map2(xfmem, 0, 0, 0, 0, 0, 0, 0);
+
+  return 0;
+}
+EOF
+        compile_prog "" "$xen_libs -lxendevicemodel $xen_stable_libs"
+      then
+      xen_stable_libs="-lxendevicemodel $xen_stable_libs"
+      xen_ctrl_version=41000
+      xen=yes
+    elif
+        cat > $TMPC <<EOF &&
 #undef XC_WANT_COMPAT_DEVICEMODEL_API
 #define __XEN_TOOLS__
 #include <xendevicemodel.h>
@@ -2647,7 +2757,11 @@ int main( void ) { return SDL_Init (SDL_INIT_VIDEO); }
 EOF
  sdl_cflags=$($sdlconfig --cflags 2>/dev/null)
  if test "$static" = "yes" ; then
-    sdl_libs=$($sdlconfig --static-libs 2>/dev/null)
+    if $pkg_config $sdlname --exists; then
+      sdl_libs=$($pkg_config $sdlname --static --libs 2>/dev/null)
+    else
+      sdl_libs=$($sdlconfig --static-libs 2>/dev/null)
+    fi
  else
    sdl_libs=$($sdlconfig --libs 2>/dev/null)
  fi
@@ -3029,6 +3143,8 @@ int main(void) {
 EOF
  IFS=:
  for curses_inc in $curses_inc_list; do
+    # Make sure we get the wide character prototypes
+    curses_inc="-DNCURSES_WIDECHAR $curses_inc"
    IFS=:
    for curses_lib in $curses_lib_list; do
      unset IFS
@@ -4679,7 +4795,7 @@ fi
 if test "$fortify_source" != "no"; then
  if echo | $cc -dM -E - | grep __clang__ > /dev/null 2>&1 ; then
    fortify_source="no";
-  elif test -n "$cxx" &&
+  elif test -n "$cxx" && has $cxx &&
       echo | $cxx -dM -E - | grep __clang__ >/dev/null 2>&1 ; then
    fortify_source="no";
  else
@@ -4742,6 +4858,32 @@ if compile_prog "" "" ; then
    have_af_vsock=yes
 fi

+##########################################
+# check for usable AF_ALG environment
+hava_afalg=no
+cat > $TMPC << EOF
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <linux/if_alg.h>
+int main(void) {
+    int sock;
+    sock = socket(AF_ALG, SOCK_SEQPACKET, 0);
+    return sock;
+}
+EOF
+if compile_prog "" "" ; then
+    have_afalg=yes
+fi
+if test "$crypto_afalg" = "yes"
+then
+    if test "$have_afalg" != "yes"
+    then
+	error_exit "AF_ALG requested but could not be detected"
+    fi
+fi
+
+
 #################################################
 # Sparc implicitly links with --relax, which is
 # incompatible with -r, so --no-relax should be
@@ -4822,6 +4964,21 @@ if compile_prog "" "" ; then
    have_static_assert=yes
 fi

+##########################################
+# check for utmpx.h, it is missing e.g. on OpenBSD
+
+have_utmpx=no
+cat > $TMPC << EOF
+#include <utmpx.h>
+struct utmpx user_info;
+int main(void) {
+    return 0;
+}
+EOF
+if compile_prog "" "" ; then
+    have_utmpx=yes
+fi
+
 ##########################################
 # End of CC checks
 # After here, no more $cc or $ld runs
@@ -5117,7 +5274,6 @@ echo "module support    $modules"
 echo "host CPU          $cpu"
 echo "host big endian   $bigendian"
 echo "target list       $target_list"
-echo "tcg debug enabled $debug_tcg"
 echo "gprof enabled     $gprof"
 echo "sparse enabled    $sparse"
 echo "strip binaries    $strip_opt"
@@ -5172,8 +5328,12 @@ echo "ATTR/XATTR support $attr"
 echo "Install blobs     $blobs"
 echo "KVM support       $kvm"
 echo "HAX support       $hax"
+echo "TCG support       $tcg"
+if test "$tcg" = "yes" ; then
+    echo "TCG debug enabled $debug_tcg"
+    echo "TCG interpreter   $tcg_interpreter"
+fi
 echo "RDMA support      $rdma"
-echo "TCG interpreter   $tcg_interpreter"
 echo "fdt support       $fdt"
 echo "preadv support    $preadv"
 echo "fdatasync         $fdatasync"
@@ -5205,6 +5365,7 @@ echo "seccomp support   $seccomp"
 echo "coroutine backend $coroutine"
 echo "coroutine pool    $coroutine_pool"
 echo "debug stack usage $debug_stack_usage"
+echo "crypto afalg      $crypto_afalg"
 echo "GlusterFS support $glusterfs"
 echo "gcov              $gcov_tool"
 echo "gcov enabled      $gcov"
@@ -5616,8 +5777,11 @@ fi
 if test "$signalfd" = "yes" ; then
  echo "CONFIG_SIGNALFD=y" >> $config_host_mak
 fi
-if test "$tcg_interpreter" = "yes" ; then
-  echo "CONFIG_TCG_INTERPRETER=y" >> $config_host_mak
+if test "$tcg" = "yes"; then
+  echo "CONFIG_TCG=y" >> $config_host_mak
+  if test "$tcg_interpreter" = "yes" ; then
+    echo "CONFIG_TCG_INTERPRETER=y" >> $config_host_mak
+  fi
 fi
 if test "$fdatasync" = "yes" ; then
  echo "CONFIG_FDATASYNC=y" >> $config_host_mak
@@ -5713,6 +5877,10 @@ if test "$debug_stack_usage" = "yes" ; then
  echo "CONFIG_DEBUG_STACK_USAGE=y" >> $config_host_mak
 fi

+if test "$crypto_afalg" = "yes" ; then
+  echo "CONFIG_AF_ALG=y" >> $config_host_mak
+fi
+
 if test "$open_by_handle_at" = "yes" ; then
  echo "CONFIG_OPEN_BY_HANDLE=y" >> $config_host_mak
 fi
@@ -5861,6 +6029,10 @@ if test "$have_static_assert" = "yes" ; then
  echo "CONFIG_STATIC_ASSERT=y" >> $config_host_mak
 fi

+if test "$have_utmpx" = "yes" ; then
+  echo "HAVE_UTMPX=y" >> $config_host_mak
+fi
+
 # Hold two types of flag:
 #   CONFIG_THREAD_SETNAME_BYTHREAD  - we've got a way of setting the name on
 #                                     a thread we have a handle to
@@ -6004,16 +6176,10 @@ case "$target" in
    target_softmmu="yes"
    ;;
  ${target_name}-linux-user)
-    if test "$linux" != "yes" ; then
-      error_exit "Target '$target' is only available on a Linux host"
-    fi
    target_user_only="yes"
    target_linux_user="yes"
    ;;
  ${target_name}-bsd-user)
-    if test "$bsd" != "yes" ; then
-      error_exit "Target '$target' is only available on a BSD host"
-    fi
    target_user_only="yes"
    target_bsd_user="yes"
    ;;
@@ -6066,7 +6232,7 @@ case "$target_name" in
  ;;
  m68k)
    bflt="yes"
-    gdb_xml_files="cf-core.xml cf-fp.xml"
+    gdb_xml_files="cf-core.xml cf-fp.xml m68k-fp.xml"
  ;;
  microblaze|microblazeel)
    TARGET_ARCH=microblaze
@@ -6139,7 +6305,7 @@ case "$target_name" in
    echo "TARGET_ABI32=y" >> $config_target_mak
  ;;
  s390x)
-    gdb_xml_files="s390x-core64.xml s390-acr.xml s390-fpr.xml s390-vx.xml s390-cr.xml s390-virt.xml"
+    gdb_xml_files="s390x-core64.xml s390-acr.xml s390-fpr.xml s390-vx.xml s390-cr.xml s390-virt.xml s390-gs.xml"
  ;;
  tilegx)
  ;;
@@ -6176,46 +6342,22 @@ echo "TARGET_ABI_DIR=$TARGET_ABI_DIR" >> $config_target_mak
 if [ "$HOST_VARIANT_DIR" != "" ]; then
    echo "HOST_VARIANT_DIR=$HOST_VARIANT_DIR" >> $config_target_mak
 fi
-case "$target_name" in
-  i386|x86_64)
-    if test "$xen" = "yes" -a "$target_softmmu" = "yes" ; then
-      echo "CONFIG_XEN=y" >> $config_target_mak
-      if test "$xen_pci_passthrough" = yes; then
+
+if supported_xen_target $target; then
+    echo "CONFIG_XEN=y" >> $config_target_mak
+    if test "$xen_pci_passthrough" = yes; then
        echo "CONFIG_XEN_PCI_PASSTHROUGH=y" >> "$config_target_mak"
-      fi
    fi
-    ;;
-  *)
-esac
-case "$target_name" in
-  aarch64|arm|i386|x86_64|ppcemb|ppc|ppc64|s390x|mipsel|mips)
-    # Make sure the target and host cpus are compatible
-    if test "$kvm" = "yes" -a "$target_softmmu" = "yes" -a \
-      \( "$target_name" = "$cpu" -o \
-      \( "$target_name" = "ppcemb" -a "$cpu" = "ppc" \) -o \
-      \( "$target_name" = "ppc64"  -a "$cpu" = "ppc" \) -o \
-      \( "$target_name" = "ppc"    -a "$cpu" = "ppc64" \) -o \
-      \( "$target_name" = "ppcemb" -a "$cpu" = "ppc64" \) -o \
-      \( "$target_name" = "mipsel" -a "$cpu" = "mips" \) -o \
-      \( "$target_name" = "x86_64" -a "$cpu" = "i386"   \) -o \
-      \( "$target_name" = "i386"   -a "$cpu" = "x86_64" \) -o \
-      \( "$target_name" = "x86_64" -a "$cpu" = "x32"   \) -o \
-      \( "$target_name" = "i386"   -a "$cpu" = "x32" \) \) ; then
-      echo "CONFIG_KVM=y" >> $config_target_mak
-      if test "$vhost_net" = "yes" ; then
+fi
+if supported_kvm_target $target; then
+    echo "CONFIG_KVM=y" >> $config_target_mak
+    if test "$vhost_net" = "yes" ; then
        echo "CONFIG_VHOST_NET=y" >> $config_target_mak
        echo "CONFIG_VHOST_NET_TEST_$target_name=y" >> $config_host_mak
-      fi
    fi
-esac
-if test "$hax" = "yes" ; then
-  if test "$target_softmmu" = "yes" ; then
-    case "$target_name" in
-    i386|x86_64)
-      echo "CONFIG_HAX=y" >> $config_target_mak
-    ;;
-    esac
-  fi
+fi
+if supported_hax_target $target; then
+    echo "CONFIG_HAX=y" >> $config_target_mak
 fi
 if test "$target_bigendian" = "yes" ; then
  echo "TARGET_WORDS_BIGENDIAN=y" >> $config_target_mak
--- a/cpus.c
+++ b/cpus.c
@@ -557,7 +557,7 @@ void qemu_start_warp_timer(void)
    if (deadline < 0) {
        static bool notified;
        if (!icount_sleep && !notified) {
-            error_report("WARNING: icount sleep disabled and no active timers");
+            warn_report("icount sleep disabled and no active timers");
            notified = true;
        }
        return;
--- a/crypto/Makefile.objs
+++ b/crypto/Makefile.objs
@@ -10,6 +10,9 @@ crypto-obj-$(if $(CONFIG_NETTLE),n,$(if $(CONFIG_GCRYPT_HMAC),n,y)) += hmac-glib
 crypto-obj-y += aes.o
 crypto-obj-y += desrfb.o
 crypto-obj-y += cipher.o
+crypto-obj-$(CONFIG_AF_ALG) += afalg.o
+crypto-obj-$(CONFIG_AF_ALG) += cipher-afalg.o
+crypto-obj-$(CONFIG_AF_ALG) += hash-afalg.o
 crypto-obj-y += tlscreds.o
 crypto-obj-y += tlscredsanon.o
 crypto-obj-y += tlscredsx509.o
--- a/crypto/afalg.c
+++ b/crypto/afalg.c
@@ -0,0 +1,116 @@
+/*
+ * QEMU Crypto af_alg support
+ *
+ * Copyright (c) 2017 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Longpeng(Mike) <longpeng2@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "qemu/sockets.h"
+#include "qapi/error.h"
+#include "afalgpriv.h"
+
+static bool
+qcrypto_afalg_build_saddr(const char *type, const char *name,
+                          struct sockaddr_alg *salg, Error **errp)
+{
+    salg->salg_family = AF_ALG;
+
+    if (strnlen(type, SALG_TYPE_LEN_MAX) >= SALG_TYPE_LEN_MAX) {
+        error_setg(errp, "Afalg type(%s) is larger than %d bytes",
+                   type, SALG_TYPE_LEN_MAX);
+        return false;
+    }
+
+    if (strnlen(name, SALG_NAME_LEN_MAX) >= SALG_NAME_LEN_MAX) {
+        error_setg(errp, "Afalg name(%s) is larger than %d bytes",
+                   name, SALG_NAME_LEN_MAX);
+        return false;
+    }
+
+    pstrcpy((char *)salg->salg_type, SALG_TYPE_LEN_MAX, type);
+    pstrcpy((char *)salg->salg_name, SALG_NAME_LEN_MAX, name);
+
+    return true;
+}
+
+static int
+qcrypto_afalg_socket_bind(const char *type, const char *name,
+                          Error **errp)
+{
+    int sbind;
+    struct sockaddr_alg salg = {0};
+
+    if (!qcrypto_afalg_build_saddr(type, name, &salg, errp)) {
+        return -1;
+    }
+
+    sbind = qemu_socket(AF_ALG, SOCK_SEQPACKET, 0);
+    if (sbind < 0) {
+        error_setg_errno(errp, errno, "Failed to create socket");
+        return -1;
+    }
+
+    if (bind(sbind, (const struct sockaddr *)&salg, sizeof(salg)) != 0) {
+        error_setg_errno(errp, errno, "Failed to bind socket");
+        closesocket(sbind);
+        return -1;
+    }
+
+    return sbind;
+}
+
+QCryptoAFAlg *
+qcrypto_afalg_comm_alloc(const char *type, const char *name,
+                         Error **errp)
+{
+    QCryptoAFAlg *afalg;
+
+    afalg = g_new0(QCryptoAFAlg, 1);
+    /* initilize crypto API socket */
+    afalg->opfd = -1;
+    afalg->tfmfd = qcrypto_afalg_socket_bind(type, name, errp);
+    if (afalg->tfmfd == -1) {
+        goto error;
+    }
+
+    afalg->opfd = qemu_accept(afalg->tfmfd, NULL, 0);
+    if (afalg->opfd == -1) {
+        error_setg_errno(errp, errno, "Failed to accept socket");
+        goto error;
+    }
+
+    return afalg;
+
+error:
+    qcrypto_afalg_comm_free(afalg);
+    return NULL;
+}
+
+void qcrypto_afalg_comm_free(QCryptoAFAlg *afalg)
+{
+    if (!afalg) {
+        return;
+    }
+
+    if (afalg->msg) {
+        g_free(afalg->msg->msg_control);
+        g_free(afalg->msg);
+    }
+
+    if (afalg->tfmfd != -1) {
+        closesocket(afalg->tfmfd);
+    }
+
+    if (afalg->opfd != -1) {
+        closesocket(afalg->opfd);
+    }
+
+    g_free(afalg);
+}
--- a/crypto/afalgpriv.h
+++ b/crypto/afalgpriv.h
@@ -0,0 +1,64 @@
+/*
+ * QEMU Crypto af_alg support
+ *
+ * Copyright (c) 2017 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Longpeng(Mike) <longpeng2@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#ifndef QCRYPTO_AFALGPRIV_H
+#define QCRYPTO_AFALGPRIV_H
+
+#include <linux/if_alg.h>
+
+#define SALG_TYPE_LEN_MAX 14
+#define SALG_NAME_LEN_MAX 64
+
+#ifndef SOL_ALG
+#define SOL_ALG 279
+#endif
+
+#define AFALG_TYPE_CIPHER "skcipher"
+#define AFALG_TYPE_HASH "hash"
+
+#define ALG_OPTYPE_LEN 4
+#define ALG_MSGIV_LEN(len) (sizeof(struct af_alg_iv) + (len))
+
+typedef struct QCryptoAFAlg QCryptoAFAlg;
+
+struct QCryptoAFAlg {
+    int tfmfd;
+    int opfd;
+    struct msghdr *msg;
+    struct cmsghdr *cmsg;
+};
+
+/**
+ * qcrypto_afalg_comm_alloc:
+ * @type: the type of crypto operation
+ * @name: the name of crypto operation
+ *
+ * Allocate a QCryptoAFAlg object and bind itself to
+ * a AF_ALG socket.
+ *
+ * Returns:
+ *  a new QCryptoAFAlg object, or NULL in error.
+ */
+QCryptoAFAlg *
+qcrypto_afalg_comm_alloc(const char *type, const char *name,
+                         Error **errp);
+
+/**
+ * afalg_comm_free:
+ * @afalg: the QCryptoAFAlg object
+ *
+ * Free the @afalg.
+ */
+void qcrypto_afalg_comm_free(QCryptoAFAlg *afalg);
+
+#endif
--- a/crypto/block-luks.c
+++ b/crypto/block-luks.c
@@ -638,6 +638,7 @@ qcrypto_block_luks_find_key(QCryptoBlock *block,
 static int
 qcrypto_block_luks_open(QCryptoBlock *block,
                        QCryptoBlockOpenOptions *options,
+                        const char *optprefix,
                        QCryptoBlockReadFunc readfunc,
                        void *opaque,
                        unsigned int flags,
@@ -661,7 +662,8 @@ qcrypto_block_luks_open(QCryptoBlock *block,

    if (!(flags & QCRYPTO_BLOCK_OPEN_NO_IO)) {
        if (!options->u.luks.key_secret) {
-            error_setg(errp, "Parameter 'key-secret' is required for cipher");
+            error_setg(errp, "Parameter '%skey-secret' is required for cipher",
+                       optprefix ? optprefix : "");
            return -1;
        }
        password = qcrypto_secret_lookup_as_utf8(
@@ -885,6 +887,7 @@ qcrypto_block_luks_uuid_gen(uint8_t *uuidstr)
 static int
 qcrypto_block_luks_create(QCryptoBlock *block,
                          QCryptoBlockCreateOptions *options,
+                          const char *optprefix,
                          QCryptoBlockInitFunc initfunc,
                          QCryptoBlockWriteFunc writefunc,
                          void *opaque,
@@ -937,7 +940,8 @@ qcrypto_block_luks_create(QCryptoBlock *block,
     * be silently ignored, for compatibility with dm-crypt */

    if (!options->u.luks.key_secret) {
-        error_setg(errp, "Parameter 'key-secret' is required for cipher");
+        error_setg(errp, "Parameter '%skey-secret' is required for cipher",
+                   optprefix ? optprefix : "");
        return -1;
    }
    password = qcrypto_secret_lookup_as_utf8(luks_opts.key_secret, errp);
--- a/crypto/block-qcow.c
+++ b/crypto/block-qcow.c
@@ -94,6 +94,7 @@ qcrypto_block_qcow_init(QCryptoBlock *block,
 static int
 qcrypto_block_qcow_open(QCryptoBlock *block,
                        QCryptoBlockOpenOptions *options,
+                        const char *optprefix,
                        QCryptoBlockReadFunc readfunc G_GNUC_UNUSED,
                        void *opaque G_GNUC_UNUSED,
                        unsigned int flags,
@@ -104,7 +105,8 @@ qcrypto_block_qcow_open(QCryptoBlock *block,
    } else {
        if (!options->u.qcow.key_secret) {
            error_setg(errp,
-                       "Parameter 'key-secret' is required for cipher");
+                       "Parameter '%skey-secret' is required for cipher",
+                       optprefix ? optprefix : "");
            return -1;
        }
        return qcrypto_block_qcow_init(block,
@@ -116,13 +118,15 @@ qcrypto_block_qcow_open(QCryptoBlock *block,
 static int
 qcrypto_block_qcow_create(QCryptoBlock *block,
                          QCryptoBlockCreateOptions *options,
+                          const char *optprefix,
                          QCryptoBlockInitFunc initfunc G_GNUC_UNUSED,
                          QCryptoBlockWriteFunc writefunc G_GNUC_UNUSED,
                          void *opaque G_GNUC_UNUSED,
                          Error **errp)
 {
    if (!options->u.qcow.key_secret) {
-        error_setg(errp, "Parameter 'key-secret' is required for cipher");
+        error_setg(errp, "Parameter '%skey-secret' is required for cipher",
+                   optprefix ? optprefix : "");
        return -1;
    }
    /* QCow2 has no special header, since everything is hardwired */
--- a/crypto/block.c
+++ b/crypto/block.c
@@ -48,6 +48,7 @@ bool qcrypto_block_has_format(QCryptoBlockFormat format,


 QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options,
+                                 const char *optprefix,
                                 QCryptoBlockReadFunc readfunc,
                                 void *opaque,
                                 unsigned int flags,
@@ -67,7 +68,7 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options,

    block->driver = qcrypto_block_drivers[options->format];

-    if (block->driver->open(block, options,
+    if (block->driver->open(block, options, optprefix,
                            readfunc, opaque, flags, errp) < 0) {
        g_free(block);
        return NULL;
@@ -78,6 +79,7 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options,


 QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options,
+                                   const char *optprefix,
                                   QCryptoBlockInitFunc initfunc,
                                   QCryptoBlockWriteFunc writefunc,
                                   void *opaque,
@@ -97,7 +99,7 @@ QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options,

    block->driver = qcrypto_block_drivers[options->format];

-    if (block->driver->create(block, options, initfunc,
+    if (block->driver->create(block, options, optprefix, initfunc,
                              writefunc, opaque, errp) < 0) {
        g_free(block);
        return NULL;
--- a/crypto/blockpriv.h
+++ b/crypto/blockpriv.h
@@ -41,6 +41,7 @@ struct QCryptoBlock {
 struct QCryptoBlockDriver {
    int (*open)(QCryptoBlock *block,
                QCryptoBlockOpenOptions *options,
+                const char *optprefix,
                QCryptoBlockReadFunc readfunc,
                void *opaque,
                unsigned int flags,
@@ -48,6 +49,7 @@ struct QCryptoBlockDriver {

    int (*create)(QCryptoBlock *block,
                  QCryptoBlockCreateOptions *options,
+                  const char *optprefix,
                  QCryptoBlockInitFunc initfunc,
                  QCryptoBlockWriteFunc writefunc,
                  void *opaque,
--- a/crypto/cipher-afalg.c
+++ b/crypto/cipher-afalg.c
@@ -0,0 +1,226 @@
+/*
+ * QEMU Crypto af_alg-backend cipher support
+ *
+ * Copyright (c) 2017 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Longpeng(Mike) <longpeng2@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/sockets.h"
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "crypto/cipher.h"
+#include "cipherpriv.h"
+
+
+static char *
+qcrypto_afalg_cipher_format_name(QCryptoCipherAlgorithm alg,
+                                 QCryptoCipherMode mode,
+                                 Error **errp)
+{
+    char *name;
+    const char *alg_name;
+    const char *mode_name;
+
+    switch (alg) {
+    case QCRYPTO_CIPHER_ALG_AES_128:
+    case QCRYPTO_CIPHER_ALG_AES_192:
+    case QCRYPTO_CIPHER_ALG_AES_256:
+        alg_name = "aes";
+        break;
+    case QCRYPTO_CIPHER_ALG_CAST5_128:
+        alg_name = "cast5";
+        break;
+    case QCRYPTO_CIPHER_ALG_SERPENT_128:
+    case QCRYPTO_CIPHER_ALG_SERPENT_192:
+    case QCRYPTO_CIPHER_ALG_SERPENT_256:
+        alg_name = "serpent";
+        break;
+    case QCRYPTO_CIPHER_ALG_TWOFISH_128:
+    case QCRYPTO_CIPHER_ALG_TWOFISH_192:
+    case QCRYPTO_CIPHER_ALG_TWOFISH_256:
+        alg_name = "twofish";
+        break;
+
+    default:
+        error_setg(errp, "Unsupported cipher algorithm %d", alg);
+        return NULL;
+    }
+
+    mode_name = QCryptoCipherMode_lookup[mode];
+    name = g_strdup_printf("%s(%s)", mode_name, alg_name);
+
+    return name;
+}
+
+QCryptoAFAlg *
+qcrypto_afalg_cipher_ctx_new(QCryptoCipherAlgorithm alg,
+                             QCryptoCipherMode mode,
+                             const uint8_t *key,
+                             size_t nkey, Error **errp)
+{
+    QCryptoAFAlg *afalg;
+    size_t expect_niv;
+    char *name;
+
+    name = qcrypto_afalg_cipher_format_name(alg, mode, errp);
+    if (!name) {
+        return NULL;
+    }
+
+    afalg = qcrypto_afalg_comm_alloc(AFALG_TYPE_CIPHER, name, errp);
+    if (!afalg) {
+        g_free(name);
+        return NULL;
+    }
+
+    g_free(name);
+
+    /* setkey */
+    if (qemu_setsockopt(afalg->tfmfd, SOL_ALG, ALG_SET_KEY, key,
+                        nkey) != 0) {
+        error_setg_errno(errp, errno, "Set key failed");
+        qcrypto_afalg_comm_free(afalg);
+        return NULL;
+    }
+
+    /* prepare msg header */
+    afalg->msg = g_new0(struct msghdr, 1);
+    afalg->msg->msg_controllen += CMSG_SPACE(ALG_OPTYPE_LEN);
+    expect_niv = qcrypto_cipher_get_iv_len(alg, mode);
+    if (expect_niv) {
+        afalg->msg->msg_controllen += CMSG_SPACE(ALG_MSGIV_LEN(expect_niv));
+    }
+    afalg->msg->msg_control = g_new0(uint8_t, afalg->msg->msg_controllen);
+
+    /* We use 1st msghdr for crypto-info and 2nd msghdr for IV-info */
+    afalg->cmsg = CMSG_FIRSTHDR(afalg->msg);
+    afalg->cmsg->cmsg_type = ALG_SET_OP;
+    afalg->cmsg->cmsg_len = CMSG_SPACE(ALG_OPTYPE_LEN);
+    if (expect_niv) {
+        afalg->cmsg = CMSG_NXTHDR(afalg->msg, afalg->cmsg);
+        afalg->cmsg->cmsg_type = ALG_SET_IV;
+        afalg->cmsg->cmsg_len = CMSG_SPACE(ALG_MSGIV_LEN(expect_niv));
+    }
+    afalg->cmsg = CMSG_FIRSTHDR(afalg->msg);
+
+    return afalg;
+}
+
+static int
+qcrypto_afalg_cipher_setiv(QCryptoCipher *cipher,
+                           const uint8_t *iv,
+                           size_t niv, Error **errp)
+{
+    struct af_alg_iv *alg_iv;
+    size_t expect_niv;
+    QCryptoAFAlg *afalg = cipher->opaque;
+
+    expect_niv = qcrypto_cipher_get_iv_len(cipher->alg, cipher->mode);
+    if (niv != expect_niv) {
+        error_setg(errp, "Set IV len(%zu) not match expected(%zu)",
+                   niv, expect_niv);
+        return -1;
+    }
+
+    /* move ->cmsg to next msghdr, for IV-info */
+    afalg->cmsg = CMSG_NXTHDR(afalg->msg, afalg->cmsg);
+
+    /* build setiv msg */
+    afalg->cmsg->cmsg_level = SOL_ALG;
+    alg_iv = (struct af_alg_iv *)CMSG_DATA(afalg->cmsg);
+    alg_iv->ivlen = niv;
+    memcpy(alg_iv->iv, iv, niv);
+
+    return 0;
+}
+
+static int
+qcrypto_afalg_cipher_op(QCryptoAFAlg *afalg,
+                        const void *in, void *out,
+                        size_t len, bool do_encrypt,
+                        Error **errp)
+{
+    uint32_t *type = NULL;
+    struct iovec iov;
+    size_t ret, rlen, done = 0;
+    uint32_t origin_controllen;
+
+    origin_controllen = afalg->msg->msg_controllen;
+    /* movev ->cmsg to first header, for crypto-info */
+    afalg->cmsg = CMSG_FIRSTHDR(afalg->msg);
+
+    /* build encrypt msg */
+    afalg->cmsg->cmsg_level = SOL_ALG;
+    afalg->msg->msg_iov = &iov;
+    afalg->msg->msg_iovlen = 1;
+    type = (uint32_t *)CMSG_DATA(afalg->cmsg);
+    if (do_encrypt) {
+        *type = ALG_OP_ENCRYPT;
+    } else {
+        *type = ALG_OP_DECRYPT;
+    }
+
+    do {
+        iov.iov_base = (void *)in + done;
+        iov.iov_len = len - done;
+
+        /* send info to AF_ALG core */
+        ret = sendmsg(afalg->opfd, afalg->msg, 0);
+        if (ret == -1) {
+            error_setg_errno(errp, errno, "Send data to AF_ALG core failed");
+            return -1;
+        }
+
+        /* encrypto && get result */
+        rlen = read(afalg->opfd, out, ret);
+        if (rlen == -1) {
+            error_setg_errno(errp, errno, "Get result from AF_ALG core failed");
+            return -1;
+        }
+        assert(rlen == ret);
+
+        /* do not update IV for following chunks */
+        afalg->msg->msg_controllen = 0;
+        done += ret;
+    } while (done < len);
+
+    afalg->msg->msg_controllen = origin_controllen;
+
+    return 0;
+}
+
+static int
+qcrypto_afalg_cipher_encrypt(QCryptoCipher *cipher,
+                             const void *in, void *out,
+                             size_t len, Error **errp)
+{
+    return qcrypto_afalg_cipher_op(cipher->opaque, in, out,
+                                   len, true, errp);
+}
+
+static int
+qcrypto_afalg_cipher_decrypt(QCryptoCipher *cipher,
+                             const void *in, void *out,
+                             size_t len, Error **errp)
+{
+    return qcrypto_afalg_cipher_op(cipher->opaque, in, out,
+                                   len, false, errp);
+}
+
+static void qcrypto_afalg_comm_ctx_free(QCryptoCipher *cipher)
+{
+    qcrypto_afalg_comm_free(cipher->opaque);
+}
+
+struct QCryptoCipherDriver qcrypto_cipher_afalg_driver = {
+    .cipher_encrypt = qcrypto_afalg_cipher_encrypt,
+    .cipher_decrypt = qcrypto_afalg_cipher_decrypt,
+    .cipher_setiv = qcrypto_afalg_cipher_setiv,
+    .cipher_free = qcrypto_afalg_comm_ctx_free,
+};
--- a/crypto/cipher-builtin.c
+++ b/crypto/cipher-builtin.c
@@ -22,6 +22,7 @@
 #include "crypto/aes.h"
 #include "crypto/desrfb.h"
 #include "crypto/xts.h"
+#include "cipherpriv.h"

 typedef struct QCryptoCipherBuiltinAESContext QCryptoCipherBuiltinAESContext;
 struct QCryptoCipherBuiltinAESContext {
@@ -235,23 +236,24 @@ static int qcrypto_cipher_setiv_aes(QCryptoCipher *cipher,



-static int qcrypto_cipher_init_aes(QCryptoCipher *cipher,
-                                   const uint8_t *key, size_t nkey,
-                                   Error **errp)
+static QCryptoCipherBuiltin *
+qcrypto_cipher_init_aes(QCryptoCipherMode mode,
+                        const uint8_t *key, size_t nkey,
+                        Error **errp)
 {
    QCryptoCipherBuiltin *ctxt;

-    if (cipher->mode != QCRYPTO_CIPHER_MODE_CBC &&
-        cipher->mode != QCRYPTO_CIPHER_MODE_ECB &&
-        cipher->mode != QCRYPTO_CIPHER_MODE_XTS) {
+    if (mode != QCRYPTO_CIPHER_MODE_CBC &&
+        mode != QCRYPTO_CIPHER_MODE_ECB &&
+        mode != QCRYPTO_CIPHER_MODE_XTS) {
        error_setg(errp, "Unsupported cipher mode %s",
-                   QCryptoCipherMode_lookup[cipher->mode]);
-        return -1;
+                   QCryptoCipherMode_lookup[mode]);
+        return NULL;
    }

    ctxt = g_new0(QCryptoCipherBuiltin, 1);

-    if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+    if (mode == QCRYPTO_CIPHER_MODE_XTS) {
        if (AES_set_encrypt_key(key, nkey * 4, &ctxt->state.aes.key.enc) != 0) {
            error_setg(errp, "Failed to set encryption key");
            goto error;
@@ -291,13 +293,11 @@ static int qcrypto_cipher_init_aes(QCryptoCipher *cipher,
    ctxt->encrypt = qcrypto_cipher_encrypt_aes;
    ctxt->decrypt = qcrypto_cipher_decrypt_aes;

-    cipher->opaque = ctxt;
-
-    return 0;
+    return ctxt;

 error:
    g_free(ctxt);
-    return -1;
+    return NULL;
 }


@@ -370,16 +370,17 @@ static int qcrypto_cipher_setiv_des_rfb(QCryptoCipher *cipher,
 }


-static int qcrypto_cipher_init_des_rfb(QCryptoCipher *cipher,
-                                       const uint8_t *key, size_t nkey,
-                                       Error **errp)
+static QCryptoCipherBuiltin *
+qcrypto_cipher_init_des_rfb(QCryptoCipherMode mode,
+                            const uint8_t *key, size_t nkey,
+                            Error **errp)
 {
    QCryptoCipherBuiltin *ctxt;

-    if (cipher->mode != QCRYPTO_CIPHER_MODE_ECB) {
+    if (mode != QCRYPTO_CIPHER_MODE_ECB) {
        error_setg(errp, "Unsupported cipher mode %s",
-                   QCryptoCipherMode_lookup[cipher->mode]);
-        return -1;
+                   QCryptoCipherMode_lookup[mode]);
+        return NULL;
    }

    ctxt = g_new0(QCryptoCipherBuiltin, 1);
@@ -394,9 +395,7 @@ static int qcrypto_cipher_init_des_rfb(QCryptoCipher *cipher,
    ctxt->encrypt = qcrypto_cipher_encrypt_des_rfb;
    ctxt->decrypt = qcrypto_cipher_decrypt_des_rfb;

-    cipher->opaque = ctxt;
-
-    return 0;
+    return ctxt;
 }


@@ -426,12 +425,13 @@ bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg,
 }


-QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
-                                  QCryptoCipherMode mode,
-                                  const uint8_t *key, size_t nkey,
-                                  Error **errp)
+static QCryptoCipherBuiltin *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
+                                                    QCryptoCipherMode mode,
+                                                    const uint8_t *key,
+                                                    size_t nkey,
+                                                    Error **errp)
 {
-    QCryptoCipher *cipher;
+    QCryptoCipherBuiltin *ctxt;

    switch (mode) {
    case QCRYPTO_CIPHER_MODE_ECB:
@@ -444,60 +444,45 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        return NULL;
    }

-    cipher = g_new0(QCryptoCipher, 1);
-    cipher->alg = alg;
-    cipher->mode = mode;
-
    if (!qcrypto_cipher_validate_key_length(alg, mode, nkey, errp)) {
-        goto error;
+        return NULL;
    }

-    switch (cipher->alg) {
+    switch (alg) {
    case QCRYPTO_CIPHER_ALG_DES_RFB:
-        if (qcrypto_cipher_init_des_rfb(cipher, key, nkey, errp) < 0) {
-            goto error;
-        }
+        ctxt = qcrypto_cipher_init_des_rfb(mode, key, nkey, errp);
        break;
    case QCRYPTO_CIPHER_ALG_AES_128:
    case QCRYPTO_CIPHER_ALG_AES_192:
    case QCRYPTO_CIPHER_ALG_AES_256:
-        if (qcrypto_cipher_init_aes(cipher, key, nkey, errp) < 0) {
-            goto error;
-        }
+        ctxt = qcrypto_cipher_init_aes(mode, key, nkey, errp);
        break;
    default:
        error_setg(errp,
                   "Unsupported cipher algorithm %s",
-                   QCryptoCipherAlgorithm_lookup[cipher->alg]);
-        goto error;
+                   QCryptoCipherAlgorithm_lookup[alg]);
+        return NULL;
    }

-    return cipher;
-
- error:
-    g_free(cipher);
-    return NULL;
+    return ctxt;
 }

-void qcrypto_cipher_free(QCryptoCipher *cipher)
+static void
+qcrypto_builtin_cipher_ctx_free(QCryptoCipher *cipher)
 {
    QCryptoCipherBuiltin *ctxt;

-    if (!cipher) {
-        return;
-    }
-
    ctxt = cipher->opaque;
    ctxt->free(cipher);
-    g_free(cipher);
 }


-int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
-                           const void *in,
-                           void *out,
-                           size_t len,
-                           Error **errp)
+static int
+qcrypto_builtin_cipher_encrypt(QCryptoCipher *cipher,
+                               const void *in,
+                               void *out,
+                               size_t len,
+                               Error **errp)
 {
    QCryptoCipherBuiltin *ctxt = cipher->opaque;

@@ -511,11 +496,12 @@ int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
 }


-int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
-                           const void *in,
-                           void *out,
-                           size_t len,
-                           Error **errp)
+static int
+qcrypto_builtin_cipher_decrypt(QCryptoCipher *cipher,
+                               const void *in,
+                               void *out,
+                               size_t len,
+                               Error **errp)
 {
    QCryptoCipherBuiltin *ctxt = cipher->opaque;

@@ -529,11 +515,20 @@ int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
 }


-int qcrypto_cipher_setiv(QCryptoCipher *cipher,
-                         const uint8_t *iv, size_t niv,
-                         Error **errp)
+static int
+qcrypto_builtin_cipher_setiv(QCryptoCipher *cipher,
+                             const uint8_t *iv, size_t niv,
+                             Error **errp)
 {
    QCryptoCipherBuiltin *ctxt = cipher->opaque;

    return ctxt->setiv(cipher, iv, niv, errp);
 }
+
+
+static struct QCryptoCipherDriver qcrypto_cipher_lib_driver = {
+    .cipher_encrypt = qcrypto_builtin_cipher_encrypt,
+    .cipher_decrypt = qcrypto_builtin_cipher_decrypt,
+    .cipher_setiv = qcrypto_builtin_cipher_setiv,
+    .cipher_free = qcrypto_builtin_cipher_ctx_free,
+};
--- a/crypto/cipher-gcrypt.c
+++ b/crypto/cipher-gcrypt.c
@@ -20,6 +20,7 @@

 #include "qemu/osdep.h"
 #include "crypto/xts.h"
+#include "cipherpriv.h"

 #include <gcrypt.h>

@@ -64,12 +65,29 @@ struct QCryptoCipherGcrypt {
    uint8_t *iv;
 };

-QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
-                                  QCryptoCipherMode mode,
-                                  const uint8_t *key, size_t nkey,
-                                  Error **errp)
+static void
+qcrypto_gcrypt_cipher_free_ctx(QCryptoCipherGcrypt *ctx,
+                               QCryptoCipherMode mode)
+{
+    if (!ctx) {
+        return;
+    }
+
+    gcry_cipher_close(ctx->handle);
+    if (mode == QCRYPTO_CIPHER_MODE_XTS) {
+        gcry_cipher_close(ctx->tweakhandle);
+    }
+    g_free(ctx->iv);
+    g_free(ctx);
+}
+
+
+static QCryptoCipherGcrypt *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
+                                                   QCryptoCipherMode mode,
+                                                   const uint8_t *key,
+                                                   size_t nkey,
+                                                   Error **errp)
 {
-    QCryptoCipher *cipher;
    QCryptoCipherGcrypt *ctx;
    gcry_error_t err;
    int gcryalg, gcrymode;
@@ -146,10 +164,6 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        return NULL;
    }

-    cipher = g_new0(QCryptoCipher, 1);
-    cipher->alg = alg;
-    cipher->mode = mode;
-
    ctx = g_new0(QCryptoCipherGcrypt, 1);

    err = gcry_cipher_open(&ctx->handle, gcryalg, gcrymode, 0);
@@ -158,7 +172,7 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
                   gcry_strerror(err));
        goto error;
    }
-    if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+    if (mode == QCRYPTO_CIPHER_MODE_XTS) {
        err = gcry_cipher_open(&ctx->tweakhandle, gcryalg, gcrymode, 0);
        if (err != 0) {
            error_setg(errp, "Cannot initialize cipher: %s",
@@ -167,7 +181,7 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        }
    }

-    if (cipher->alg == QCRYPTO_CIPHER_ALG_DES_RFB) {
+    if (alg == QCRYPTO_CIPHER_ALG_DES_RFB) {
        /* We're using standard DES cipher from gcrypt, so we need
         * to munge the key so that the results are the same as the
         * bizarre RFB variant of DES :-)
@@ -177,7 +191,7 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        g_free(rfbkey);
        ctx->blocksize = 8;
    } else {
-        if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+        if (mode == QCRYPTO_CIPHER_MODE_XTS) {
            nkey /= 2;
            err = gcry_cipher_setkey(ctx->handle, key, nkey);
            if (err != 0) {
@@ -194,7 +208,7 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
                       gcry_strerror(err));
            goto error;
        }
-        switch (cipher->alg) {
+        switch (alg) {
        case QCRYPTO_CIPHER_ALG_AES_128:
        case QCRYPTO_CIPHER_ALG_AES_192:
        case QCRYPTO_CIPHER_ALG_AES_256:
@@ -214,7 +228,7 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        }
    }

-    if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+    if (mode == QCRYPTO_CIPHER_MODE_XTS) {
        if (ctx->blocksize != XTS_BLOCK_SIZE) {
            error_setg(errp,
                       "Cipher block size %zu must equal XTS block size %d",
@@ -224,34 +238,18 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        ctx->iv = g_new0(uint8_t, ctx->blocksize);
    }

-    cipher->opaque = ctx;
-    return cipher;
+    return ctx;

 error:
-    gcry_cipher_close(ctx->handle);
-    if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
-        gcry_cipher_close(ctx->tweakhandle);
-    }
-    g_free(ctx);
-    g_free(cipher);
+    qcrypto_gcrypt_cipher_free_ctx(ctx, mode);
    return NULL;
 }


-void qcrypto_cipher_free(QCryptoCipher *cipher)
+static void
+qcrypto_gcrypt_cipher_ctx_free(QCryptoCipher *cipher)
 {
-    QCryptoCipherGcrypt *ctx;
-    if (!cipher) {
-        return;
-    }
-    ctx = cipher->opaque;
-    gcry_cipher_close(ctx->handle);
-    if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
-        gcry_cipher_close(ctx->tweakhandle);
-    }
-    g_free(ctx->iv);
-    g_free(ctx);
-    g_free(cipher);
+    qcrypto_gcrypt_cipher_free_ctx(cipher->opaque, cipher->mode);
 }


@@ -275,11 +273,12 @@ static void qcrypto_gcrypt_xts_decrypt(const void *ctx,
    g_assert(err == 0);
 }

-int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
-                           const void *in,
-                           void *out,
-                           size_t len,
-                           Error **errp)
+static int
+qcrypto_gcrypt_cipher_encrypt(QCryptoCipher *cipher,
+                              const void *in,
+                              void *out,
+                              size_t len,
+                              Error **errp)
 {
    QCryptoCipherGcrypt *ctx = cipher->opaque;
    gcry_error_t err;
@@ -310,11 +309,12 @@ int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
 }


-int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
-                           const void *in,
-                           void *out,
-                           size_t len,
-                           Error **errp)
+static int
+qcrypto_gcrypt_cipher_decrypt(QCryptoCipher *cipher,
+                              const void *in,
+                              void *out,
+                              size_t len,
+                              Error **errp)
 {
    QCryptoCipherGcrypt *ctx = cipher->opaque;
    gcry_error_t err;
@@ -344,9 +344,10 @@ int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
    return 0;
 }

-int qcrypto_cipher_setiv(QCryptoCipher *cipher,
-                         const uint8_t *iv, size_t niv,
-                         Error **errp)
+static int
+qcrypto_gcrypt_cipher_setiv(QCryptoCipher *cipher,
+                            const uint8_t *iv, size_t niv,
+                            Error **errp)
 {
    QCryptoCipherGcrypt *ctx = cipher->opaque;
    gcry_error_t err;
@@ -380,3 +381,11 @@ int qcrypto_cipher_setiv(QCryptoCipher *cipher,

    return 0;
 }
+
+
+static struct QCryptoCipherDriver qcrypto_cipher_lib_driver = {
+    .cipher_encrypt = qcrypto_gcrypt_cipher_encrypt,
+    .cipher_decrypt = qcrypto_gcrypt_cipher_decrypt,
+    .cipher_setiv = qcrypto_gcrypt_cipher_setiv,
+    .cipher_free = qcrypto_gcrypt_cipher_ctx_free,
+};
--- a/crypto/cipher-nettle.c
+++ b/crypto/cipher-nettle.c
@@ -20,6 +20,7 @@

 #include "qemu/osdep.h"
 #include "crypto/xts.h"
+#include "cipherpriv.h"

 #include <nettle/nettle-types.h>
 #include <nettle/aes.h>
@@ -249,12 +250,26 @@ bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg,
 }


-QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
-                                  QCryptoCipherMode mode,
-                                  const uint8_t *key, size_t nkey,
-                                  Error **errp)
+static void
+qcrypto_nettle_cipher_free_ctx(QCryptoCipherNettle *ctx)
+{
+    if (!ctx) {
+        return;
+    }
+
+    g_free(ctx->iv);
+    g_free(ctx->ctx);
+    g_free(ctx->ctx_tweak);
+    g_free(ctx);
+}
+
+
+static QCryptoCipherNettle *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg,
+                                                   QCryptoCipherMode mode,
+                                                   const uint8_t *key,
+                                                   size_t nkey,
+                                                   Error **errp)
 {
-    QCryptoCipher *cipher;
    QCryptoCipherNettle *ctx;
    uint8_t *rfbkey;

@@ -274,12 +289,7 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
        return NULL;
    }

-    cipher = g_new0(QCryptoCipher, 1);
-    cipher->alg = alg;
-    cipher->mode = mode;
-
    ctx = g_new0(QCryptoCipherNettle, 1);
-    cipher->opaque = ctx;

    switch (alg) {
    case QCRYPTO_CIPHER_ALG_DES_RFB:
@@ -423,36 +433,30 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,

    ctx->iv = g_new0(uint8_t, ctx->blocksize);

-    return cipher;
+    return ctx;

 error:
-    qcrypto_cipher_free(cipher);
+    qcrypto_nettle_cipher_free_ctx(ctx);
    return NULL;
 }


-void qcrypto_cipher_free(QCryptoCipher *cipher)
+static void
+qcrypto_nettle_cipher_ctx_free(QCryptoCipher *cipher)
 {
    QCryptoCipherNettle *ctx;

-    if (!cipher) {
-        return;
-    }
-
    ctx = cipher->opaque;
-    g_free(ctx->iv);
-    g_free(ctx->ctx);
-    g_free(ctx->ctx_tweak);
-    g_free(ctx);
-    g_free(cipher);
+    qcrypto_nettle_cipher_free_ctx(ctx);
 }


-int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
-                           const void *in,
-                           void *out,
-                           size_t len,
-                           Error **errp)
+static int
+qcrypto_nettle_cipher_encrypt(QCryptoCipher *cipher,
+                              const void *in,
+                              void *out,
+                              size_t len,
+                              Error **errp)
 {
    QCryptoCipherNettle *ctx = cipher->opaque;

@@ -494,11 +498,12 @@ int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
 }


-int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
-                           const void *in,
-                           void *out,
-                           size_t len,
-                           Error **errp)
+static int
+qcrypto_nettle_cipher_decrypt(QCryptoCipher *cipher,
+                              const void *in,
+                              void *out,
+                              size_t len,
+                              Error **errp)
 {
    QCryptoCipherNettle *ctx = cipher->opaque;

@@ -538,9 +543,10 @@ int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
    return 0;
 }

-int qcrypto_cipher_setiv(QCryptoCipher *cipher,
-                         const uint8_t *iv, size_t niv,
-                         Error **errp)
+static int
+qcrypto_nettle_cipher_setiv(QCryptoCipher *cipher,
+                            const uint8_t *iv, size_t niv,
+                            Error **errp)
 {
    QCryptoCipherNettle *ctx = cipher->opaque;
    if (niv != ctx->blocksize) {
@@ -551,3 +557,11 @@ int qcrypto_cipher_setiv(QCryptoCipher *cipher,
    memcpy(ctx->iv, iv, niv);
    return 0;
 }
+
+
+static struct QCryptoCipherDriver qcrypto_cipher_lib_driver = {
+    .cipher_encrypt = qcrypto_nettle_cipher_encrypt,
+    .cipher_decrypt = qcrypto_nettle_cipher_decrypt,
+    .cipher_setiv = qcrypto_nettle_cipher_setiv,
+    .cipher_free = qcrypto_nettle_cipher_ctx_free,
+};
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -21,6 +21,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "crypto/cipher.h"
+#include "cipherpriv.h"


 static size_t alg_key_len[QCRYPTO_CIPHER_ALG__MAX] = {
@@ -155,3 +156,82 @@ qcrypto_cipher_munge_des_rfb_key(const uint8_t *key,
 #else
 #include "crypto/cipher-builtin.c"
 #endif
+
+QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
+                                  QCryptoCipherMode mode,
+                                  const uint8_t *key, size_t nkey,
+                                  Error **errp)
+{
+    QCryptoCipher *cipher;
+    void *ctx = NULL;
+    Error *err2 = NULL;
+    QCryptoCipherDriver *drv = NULL;
+
+#ifdef CONFIG_AF_ALG
+    ctx = qcrypto_afalg_cipher_ctx_new(alg, mode, key, nkey, &err2);
+    if (ctx) {
+        drv = &qcrypto_cipher_afalg_driver;
+    }
+#endif
+
+    if (!ctx) {
+        ctx = qcrypto_cipher_ctx_new(alg, mode, key, nkey, errp);
+        if (!ctx) {
+            error_free(err2);
+            return NULL;
+        }
+
+        drv = &qcrypto_cipher_lib_driver;
+        error_free(err2);
+    }
+
+    cipher = g_new0(QCryptoCipher, 1);
+    cipher->alg = alg;
+    cipher->mode = mode;
+    cipher->opaque = ctx;
+    cipher->driver = (void *)drv;
+
+    return cipher;
+}
+
+
+int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
+                           const void *in,
+                           void *out,
+                           size_t len,
+                           Error **errp)
+{
+    QCryptoCipherDriver *drv = cipher->driver;
+    return drv->cipher_encrypt(cipher, in, out, len, errp);
+}
+
+
+int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
+                           const void *in,
+                           void *out,
+                           size_t len,
+                           Error **errp)
+{
+    QCryptoCipherDriver *drv = cipher->driver;
+    return drv->cipher_decrypt(cipher, in, out, len, errp);
+}
+
+
+int qcrypto_cipher_setiv(QCryptoCipher *cipher,
+                         const uint8_t *iv, size_t niv,
+                         Error **errp)
+{
+    QCryptoCipherDriver *drv = cipher->driver;
+    return drv->cipher_setiv(cipher, iv, niv, errp);
+}
+
+
+void qcrypto_cipher_free(QCryptoCipher *cipher)
+{
+    QCryptoCipherDriver *drv;
+    if (cipher) {
+        drv = cipher->driver;
+        drv->cipher_free(cipher);
+        g_free(cipher);
+    }
+}
--- a/crypto/cipherpriv.h
+++ b/crypto/cipherpriv.h
@@ -0,0 +1,56 @@
+/*
+ * QEMU Crypto cipher driver supports
+ *
+ * Copyright (c) 2017 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Longpeng(Mike) <longpeng2@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ *
+ */
+
+#ifndef QCRYPTO_CIPHERPRIV_H
+#define QCRYPTO_CIPHERPRIV_H
+
+#include "qapi-types.h"
+
+typedef struct QCryptoCipherDriver QCryptoCipherDriver;
+
+struct QCryptoCipherDriver {
+    int (*cipher_encrypt)(QCryptoCipher *cipher,
+                          const void *in,
+                          void *out,
+                          size_t len,
+                          Error **errp);
+
+    int (*cipher_decrypt)(QCryptoCipher *cipher,
+                          const void *in,
+                          void *out,
+                          size_t len,
+                          Error **errp);
+
+    int (*cipher_setiv)(QCryptoCipher *cipher,
+                        const uint8_t *iv, size_t niv,
+                        Error **errp);
+
+    void (*cipher_free)(QCryptoCipher *cipher);
+};
+
+#ifdef CONFIG_AF_ALG
+
+#include "afalgpriv.h"
+
+extern QCryptoAFAlg *
+qcrypto_afalg_cipher_ctx_new(QCryptoCipherAlgorithm alg,
+                             QCryptoCipherMode mode,
+                             const uint8_t *key,
+                             size_t nkey, Error **errp);
+
+extern struct QCryptoCipherDriver qcrypto_cipher_afalg_driver;
+
+#endif
+
+#endif
--- a/crypto/hash-afalg.c
+++ b/crypto/hash-afalg.c
@@ -0,0 +1,214 @@
+/*
+ * QEMU Crypto af_alg-backend hash/hmac support
+ *
+ * Copyright (c) 2017 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Longpeng(Mike) <longpeng2@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/iov.h"
+#include "qemu/sockets.h"
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "crypto/hash.h"
+#include "crypto/hmac.h"
+#include "hashpriv.h"
+#include "hmacpriv.h"
+
+static char *
+qcrypto_afalg_hash_format_name(QCryptoHashAlgorithm alg,
+                               bool is_hmac,
+                               Error **errp)
+{
+    char *name;
+    const char *alg_name;
+
+    switch (alg) {
+    case QCRYPTO_HASH_ALG_MD5:
+        alg_name = "md5";
+        break;
+    case QCRYPTO_HASH_ALG_SHA1:
+        alg_name = "sha1";
+        break;
+    case QCRYPTO_HASH_ALG_SHA224:
+        alg_name = "sha224";
+        break;
+    case QCRYPTO_HASH_ALG_SHA256:
+        alg_name = "sha256";
+        break;
+    case QCRYPTO_HASH_ALG_SHA384:
+        alg_name = "sha384";
+        break;
+    case QCRYPTO_HASH_ALG_SHA512:
+        alg_name = "sha512";
+        break;
+    case QCRYPTO_HASH_ALG_RIPEMD160:
+        alg_name = "rmd160";
+        break;
+
+    default:
+        error_setg(errp, "Unsupported hash algorithm %d", alg);
+        return NULL;
+    }
+
+    if (is_hmac) {
+        name = g_strdup_printf("hmac(%s)", alg_name);
+    } else {
+        name = g_strdup_printf("%s", alg_name);
+    }
+
+    return name;
+}
+
+static QCryptoAFAlg *
+qcrypto_afalg_hash_hmac_ctx_new(QCryptoHashAlgorithm alg,
+                                const uint8_t *key, size_t nkey,
+                                bool is_hmac, Error **errp)
+{
+    QCryptoAFAlg *afalg;
+    char *name;
+
+    name = qcrypto_afalg_hash_format_name(alg, is_hmac, errp);
+    if (!name) {
+        return NULL;
+    }
+
+    afalg = qcrypto_afalg_comm_alloc(AFALG_TYPE_HASH, name, errp);
+    if (!afalg) {
+        g_free(name);
+        return NULL;
+    }
+
+    g_free(name);
+
+    /* HMAC needs setkey */
+    if (is_hmac) {
+        if (qemu_setsockopt(afalg->tfmfd, SOL_ALG, ALG_SET_KEY,
+                            key, nkey) != 0) {
+            error_setg_errno(errp, errno, "Set hmac key failed");
+            qcrypto_afalg_comm_free(afalg);
+            return NULL;
+        }
+    }
+
+    return afalg;
+}
+
+static QCryptoAFAlg *
+qcrypto_afalg_hash_ctx_new(QCryptoHashAlgorithm alg,
+                           Error **errp)
+{
+    return qcrypto_afalg_hash_hmac_ctx_new(alg, NULL, 0, false, errp);
+}
+
+QCryptoAFAlg *
+qcrypto_afalg_hmac_ctx_new(QCryptoHashAlgorithm alg,
+                           const uint8_t *key, size_t nkey,
+                           Error **errp)
+{
+    return qcrypto_afalg_hash_hmac_ctx_new(alg, key, nkey, true, errp);
+}
+
+static int
+qcrypto_afalg_hash_hmac_bytesv(QCryptoAFAlg *hmac,
+                               QCryptoHashAlgorithm alg,
+                               const struct iovec *iov,
+                               size_t niov, uint8_t **result,
+                               size_t *resultlen,
+                               Error **errp)
+{
+    QCryptoAFAlg *afalg;
+    struct iovec outv;
+    int ret = 0;
+    bool is_hmac = (hmac != NULL) ? true : false;
+    const int expect_len = qcrypto_hash_digest_len(alg);
+
+    if (*resultlen == 0) {
+        *resultlen = expect_len;
+        *result = g_new0(uint8_t, *resultlen);
+    } else if (*resultlen != expect_len) {
+        error_setg(errp,
+                   "Result buffer size %zu is not match hash %d",
+                   *resultlen, expect_len);
+        return -1;
+    }
+
+    if (is_hmac) {
+        afalg = hmac;
+    } else {
+        afalg = qcrypto_afalg_hash_ctx_new(alg, errp);
+        if (!afalg) {
+            return -1;
+        }
+    }
+
+    /* send data to kernel's crypto core */
+    ret = iov_send_recv(afalg->opfd, iov, niov,
+                        0, iov_size(iov, niov), true);
+    if (ret < 0) {
+        error_setg_errno(errp, errno, "Send data to afalg-core failed");
+        goto out;
+    }
+
+    /* hash && get result */
+    outv.iov_base = *result;
+    outv.iov_len = *resultlen;
+    ret = iov_send_recv(afalg->opfd, &outv, 1,
+                        0, iov_size(&outv, 1), false);
+    if (ret < 0) {
+        error_setg_errno(errp, errno, "Recv result from afalg-core failed");
+    } else {
+        ret = 0;
+    }
+
+out:
+    if (!is_hmac) {
+        qcrypto_afalg_comm_free(afalg);
+    }
+    return ret;
+}
+
+static int
+qcrypto_afalg_hash_bytesv(QCryptoHashAlgorithm alg,
+                          const struct iovec *iov,
+                          size_t niov, uint8_t **result,
+                          size_t *resultlen,
+                          Error **errp)
+{
+    return qcrypto_afalg_hash_hmac_bytesv(NULL, alg, iov, niov, result,
+                                          resultlen, errp);
+}
+
+static int
+qcrypto_afalg_hmac_bytesv(QCryptoHmac *hmac,
+                          const struct iovec *iov,
+                          size_t niov, uint8_t **result,
+                          size_t *resultlen,
+                          Error **errp)
+{
+    return qcrypto_afalg_hash_hmac_bytesv(hmac->opaque, hmac->alg,
+                                          iov, niov, result, resultlen,
+                                          errp);
+}
+
+static void qcrypto_afalg_hmac_ctx_free(QCryptoHmac *hmac)
+{
+    QCryptoAFAlg *afalg;
+
+    afalg = hmac->opaque;
+    qcrypto_afalg_comm_free(afalg);
+}
+
+QCryptoHashDriver qcrypto_hash_afalg_driver = {
+    .hash_bytesv = qcrypto_afalg_hash_bytesv,
+};
+
+QCryptoHmacDriver qcrypto_hmac_afalg_driver = {
+    .hmac_bytesv = qcrypto_afalg_hmac_bytesv,
+    .hmac_free = qcrypto_afalg_hmac_ctx_free,
+};
--- a/crypto/hash-gcrypt.c
+++ b/crypto/hash-gcrypt.c
@@ -22,6 +22,7 @@
 #include <gcrypt.h>
 #include "qapi/error.h"
 #include "crypto/hash.h"
+#include "hashpriv.h"


 static int qcrypto_hash_alg_map[QCRYPTO_HASH_ALG__MAX] = {
@@ -44,12 +45,13 @@ gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg)
 }


-int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
-                        const struct iovec *iov,
-                        size_t niov,
-                        uint8_t **result,
-                        size_t *resultlen,
-                        Error **errp)
+static int
+qcrypto_gcrypt_hash_bytesv(QCryptoHashAlgorithm alg,
+                           const struct iovec *iov,
+                           size_t niov,
+                           uint8_t **result,
+                           size_t *resultlen,
+                           Error **errp)
 {
    int i, ret;
    gcry_md_hd_t md;
@@ -107,3 +109,8 @@ int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
    gcry_md_close(md);
    return -1;
 }
+
+
+QCryptoHashDriver qcrypto_hash_lib_driver = {
+    .hash_bytesv = qcrypto_gcrypt_hash_bytesv,
+};
--- a/crypto/hash-glib.c
+++ b/crypto/hash-glib.c
@@ -21,6 +21,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "crypto/hash.h"
+#include "hashpriv.h"


 static int qcrypto_hash_alg_map[QCRYPTO_HASH_ALG__MAX] = {
@@ -47,12 +48,13 @@ gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg)
 }


-int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
-                        const struct iovec *iov,
-                        size_t niov,
-                        uint8_t **result,
-                        size_t *resultlen,
-                        Error **errp)
+static int
+qcrypto_glib_hash_bytesv(QCryptoHashAlgorithm alg,
+                         const struct iovec *iov,
+                         size_t niov,
+                         uint8_t **result,
+                         size_t *resultlen,
+                         Error **errp)
 {
    int i, ret;
    GChecksum *cs;
@@ -95,3 +97,8 @@ int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
    g_checksum_free(cs);
    return -1;
 }
+
+
+QCryptoHashDriver qcrypto_hash_lib_driver = {
+    .hash_bytesv = qcrypto_glib_hash_bytesv,
+};
--- a/crypto/hash-nettle.c
+++ b/crypto/hash-nettle.c
@@ -21,6 +21,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "crypto/hash.h"
+#include "hashpriv.h"
 #include <nettle/md5.h>
 #include <nettle/sha.h>
 #include <nettle/ripemd160.h>
@@ -103,12 +104,13 @@ gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg)
 }


-int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
-                        const struct iovec *iov,
-                        size_t niov,
-                        uint8_t **result,
-                        size_t *resultlen,
-                        Error **errp)
+static int
+qcrypto_nettle_hash_bytesv(QCryptoHashAlgorithm alg,
+                           const struct iovec *iov,
+                           size_t niov,
+                           uint8_t **result,
+                           size_t *resultlen,
+                           Error **errp)
 {
    int i;
    union qcrypto_hash_ctx ctx;
@@ -152,3 +154,8 @@ int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,

    return 0;
 }
+
+
+QCryptoHashDriver qcrypto_hash_lib_driver = {
+    .hash_bytesv = qcrypto_nettle_hash_bytesv,
+};
--- a/Show More
+++ b/Show More