Revert "hw/i386/pc: Confine system flash handling to pc_sysfw"

Specifying the property `-M pflash0` results in a regression: qemu-system-x86_64: Property 'pc-q35-9.0-machine.pflash0' not found Revert the change for now until a solution is found. This reverts commit 6f6ad2b245. Reported-by: Volker Rümelin <vr_qemu@t-online.de> Signed-off-by: Bernhard Beschow <shentey@gmail.com> Message-Id: <20240226215909.30884-3-shentey@gmail.com> Tested-by: Alex Williamson <alex.williamson@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> (cherry picked from commit f2cb9f34ad) References: XXX Signed-off-by: Dario Faggioli <dfaggioli@suse.com>
Revert "hw/i386/pc_sysfw: Inline pc_system_flash_create() and remove it"
2024-03-27 09:23:41 +01:00 · 2024-03-27 09:23:02 +01:00 · 2024-03-26 17:17:00 +01:00 · 2024-03-26 17:17:00 +01:00 · 2024-03-26 17:17:00 +01:00 · 2024-03-26 17:17:00 +01:00
290 changed files with 12546 additions and 1401 deletions
--- a/.gitlab-ci.d/cirrus/build.yml
+++ b/.gitlab-ci.d/cirrus/build.yml
@@ -21,7 +21,7 @@ build_task:
  install_script:
    - @UPDATE_COMMAND@
    - @INSTALL_COMMAND@ @PKGS@
-    - if test -n "@PYPI_PKGS@" ; then @PIP3@ install @PYPI_PKGS@ ; fi
+    - if test -n "@PYPI_PKGS@" ; then PYLIB=$(@PYTHON@ -c 'import sysconfig; print(sysconfig.get_path("stdlib"))'); rm -f $PYLIB/EXTERNALLY-MANAGED; @PIP3@ install @PYPI_PKGS@ ; fi
  clone_script:
    - git clone --depth 100 "$CI_REPOSITORY_URL" .
    - git fetch origin "$CI_COMMIT_REF_NAME"
--- a/.gitlab-ci.d/windows.yml
+++ b/.gitlab-ci.d/windows.yml
@@ -88,7 +88,6 @@
      $MINGW_TARGET-libpng
      $MINGW_TARGET-libssh
      $MINGW_TARGET-libtasn1
-      $MINGW_TARGET-libusb
      $MINGW_TARGET-lzo2
      $MINGW_TARGET-nettle
      $MINGW_TARGET-ninja
@@ -98,9 +97,8 @@
      $MINGW_TARGET-SDL2
      $MINGW_TARGET-SDL2_image
      $MINGW_TARGET-snappy
-      $MINGW_TARGET-spice
-      $MINGW_TARGET-usbredir
-      $MINGW_TARGET-zstd "
+      $MINGW_TARGET-zstd
+      $EXTRA_PACKAGES "
  - Write-Output "Running build at $(Get-Date -Format u)"
  - $env:CHERE_INVOKING = 'yes'  # Preserve the current working directory
  - $env:MSYS = 'winsymlinks:native' # Enable native Windows symlink
@@ -123,6 +121,8 @@ msys2-64bit:
  variables:
    MINGW_TARGET: mingw-w64-x86_64
    MSYSTEM: MINGW64
+    # msys2 only ship these packages for 64-bit, not 32-bit
+    EXTRA_PACKAGES: $MINGW_TARGET-libusb $MINGW_TARGET-usbredir $MINGW_TARGET-spice
    # do not remove "--without-default-devices"!
    # commit 9f8e6cad65a6 ("gitlab-ci: Speed up the msys2-64bit job by using --without-default-devices"
    # changed to compile QEMU with the --without-default-devices switch
@@ -131,11 +131,3 @@ msys2-64bit:
    # qTests don't run successfully with "--without-default-devices",
    # so let's exclude the qtests from CI for now.
    TEST_ARGS: --no-suite qtest
-
-msys2-32bit:
-  extends: .shared_msys2_builder
-  variables:
-    MINGW_TARGET: mingw-w64-i686
-    MSYSTEM: MINGW32
-    CONFIGURE_ARGS:  --target-list=ppc64-softmmu -Ddebug=false -Doptimization=0
-    TEST_ARGS: --no-suite qtest
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,12 +1,12 @@
 [submodule "roms/seabios"]
 	path = roms/seabios
-	url = https://gitlab.com/qemu-project/seabios.git/
+	url = https://github.com/openSUSE/qemu-seabios.git
 [submodule "roms/SLOF"]
 	path = roms/SLOF
-	url = https://gitlab.com/qemu-project/SLOF.git
+	url = https://github.com/openSUSE/qemu-SLOF.git
 [submodule "roms/ipxe"]
 	path = roms/ipxe
-	url = https://gitlab.com/qemu-project/ipxe.git
+	url = https://github.com/openSUSE/qemu-ipxe.git
 [submodule "roms/openbios"]
 	path = roms/openbios
 	url = https://gitlab.com/qemu-project/openbios.git
@@ -18,7 +18,7 @@
 	url = https://gitlab.com/qemu-project/u-boot.git
 [submodule "roms/skiboot"]
 	path = roms/skiboot
-	url = https://gitlab.com/qemu-project/skiboot.git
+	url = https://github.com/openSUSE/qemu-skiboot.git
 [submodule "roms/QemuMacDrivers"]
 	path = roms/QemuMacDrivers
 	url = https://gitlab.com/qemu-project/QemuMacDrivers.git
@@ -36,10 +36,25 @@
 	url = 	https://gitlab.com/qemu-project/opensbi.git
 [submodule "roms/qboot"]
 	path = roms/qboot
-	url = https://gitlab.com/qemu-project/qboot.git
+	url = https://github.com/openSUSE/qemu-qboot.git
 [submodule "roms/vbootrom"]
 	path = roms/vbootrom
 	url = https://gitlab.com/qemu-project/vbootrom.git
 [submodule "tests/lcitool/libvirt-ci"]
 	path = tests/lcitool/libvirt-ci
 	url = https://gitlab.com/libvirt/libvirt-ci.git
+[submodule "subprojects/berkeley-softfloat-3"]
+	path = subprojects/berkeley-softfloat-3
+	url = https://gitlab.com/qemu-project/berkeley-softfloat-3
+[submodule "subprojects/berkeley-testfloat-3"]
+	path = subprojects/berkeley-testfloat-3
+	url = https://gitlab.com/qemu-project/berkeley-testfloat-3
+[submodule "subprojects/dtc"]
+	path = subprojects/dtc
+	url = https://gitlab.com/qemu-project/dtc.git
+[submodule "subprojects/libvfio-user"]
+	path = subprojects/libvfio-user
+	url = https://gitlab.com/qemu-project/libvfio-user.git
+[submodule "subprojects/keycodemapdb"]
+	path = subprojects/keycodemapdb
+	url = https://gitlab.com/qemu-project/keycodemapdb.git
--- a/.obs/workflows.yml
+++ b/.obs/workflows.yml
@@ -0,0 +1,22 @@
+pr_workflow:
+  steps:
+    - branch_package:
+        source_project: Virtualization:Staging
+        source_package: qemu
+        target_project: Virtualization:Staging:PRs
+  filters:
+    event: pull_request
+    branches:
+      only:
+        - factory
+rebuild_workflow:
+  steps:
+    # Will automatically rebuild the package
+    - trigger_services:
+        project: Virtualization:Staging
+        package: qemu
+  filters:
+    event: push
+    branches:
+      only:
+        - factory
--- a/2
+++ b/2
@@ -1 +1 @@
-8.2.1
+8.2.2
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -69,16 +69,6 @@
 #define KVM_GUESTDBG_BLOCKIRQ 0
 #endif

-//#define DEBUG_KVM
-
-#ifdef DEBUG_KVM
-#define DPRINTF(fmt, ...) \
-    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) \
-    do { } while (0)
-#endif
-
 struct KVMParkedVcpu {
    unsigned long vcpu_id;
    int kvm_fd;
@@ -101,6 +91,8 @@ bool kvm_msi_use_devid;
 bool kvm_has_guest_debug;
 static int kvm_sstep_flags;
 static bool kvm_immediate_exit;
+static bool kvm_guest_memfd_supported;
+static uint64_t kvm_supported_memory_attributes;
 static hwaddr kvm_max_slot_size = ~0;

 static const KVMCapabilityInfo kvm_required_capabilites[] = {
@@ -292,34 +284,69 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
 static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, bool new)
 {
    KVMState *s = kvm_state;
-    struct kvm_userspace_memory_region mem;
+    struct kvm_userspace_memory_region2 mem;
+    static int cap_user_memory2 = -1;
    int ret;

+    if (cap_user_memory2 == -1) {
+        cap_user_memory2 = kvm_check_extension(s, KVM_CAP_USER_MEMORY2);
+    }
+
+    if (!cap_user_memory2 && slot->guest_memfd >= 0) {
+        error_report("%s, KVM doesn't support KVM_CAP_USER_MEMORY2,"
+                     " which is required by guest memfd!", __func__);
+        exit(1);
+    }
+
    mem.slot = slot->slot | (kml->as_id << 16);
    mem.guest_phys_addr = slot->start_addr;
    mem.userspace_addr = (unsigned long)slot->ram;
    mem.flags = slot->flags;
+    mem.guest_memfd = slot->guest_memfd;
+    mem.guest_memfd_offset = slot->guest_memfd_offset;

    if (slot->memory_size && !new && (mem.flags ^ slot->old_flags) & KVM_MEM_READONLY) {
        /* Set the slot size to 0 before setting the slot to the desired
         * value. This is needed based on KVM commit 75d61fbc. */
        mem.memory_size = 0;
-        ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
+
+        if (cap_user_memory2) {
+            ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem);
+        } else {
+            ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
+        }
        if (ret < 0) {
            goto err;
        }
    }
    mem.memory_size = slot->memory_size;
-    ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
+    if (cap_user_memory2) {
+        ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem);
+    } else {
+        ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
+    }
    slot->old_flags = mem.flags;
 err:
-    trace_kvm_set_user_memory(mem.slot, mem.flags, mem.guest_phys_addr,
-                              mem.memory_size, mem.userspace_addr, ret);
+    trace_kvm_set_user_memory(mem.slot >> 16, (uint16_t)mem.slot, mem.flags,
+                              mem.guest_phys_addr, mem.memory_size,
+                              mem.userspace_addr, mem.guest_memfd,
+                              mem.guest_memfd_offset, ret);
    if (ret < 0) {
-        error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d,"
-                     " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s",
-                     __func__, mem.slot, slot->start_addr,
-                     (uint64_t)mem.memory_size, strerror(errno));
+        if (cap_user_memory2) {
+                error_report("%s: KVM_SET_USER_MEMORY_REGION2 failed, slot=%d,"
+                        " start=0x%" PRIx64 ", size=0x%" PRIx64 ","
+                        " flags=0x%" PRIx32 ", guest_memfd=%" PRId32 ","
+                        " guest_memfd_offset=0x%" PRIx64 ": %s",
+                        __func__, mem.slot, slot->start_addr,
+                        (uint64_t)mem.memory_size, mem.flags,
+                        mem.guest_memfd, (uint64_t)mem.guest_memfd_offset,
+                        strerror(errno));
+        } else {
+                error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d,"
+                            " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s",
+                            __func__, mem.slot, slot->start_addr,
+                            (uint64_t)mem.memory_size, strerror(errno));
+        }
    }
    return ret;
 }
@@ -331,7 +358,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
    struct KVMParkedVcpu *vcpu = NULL;
    int ret = 0;

-    DPRINTF("kvm_destroy_vcpu\n");
+    trace_kvm_destroy_vcpu();

    ret = kvm_arch_destroy_vcpu(cpu);
    if (ret < 0) {
@@ -341,7 +368,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
    if (mmap_size < 0) {
        ret = mmap_size;
-        DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
+        trace_kvm_failed_get_vcpu_mmap_size();
        goto err;
    }

@@ -391,6 +418,11 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
    return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
 }

+int __attribute__ ((weak)) kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+    return 0;
+}
+
 int kvm_init_vcpu(CPUState *cpu, Error **errp)
 {
    KVMState *s = kvm_state;
@@ -399,15 +431,27 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)

    trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));

+    /*
+     * tdx_pre_create_vcpu() may call cpu_x86_cpuid(). It in turn may call
+     * kvm_vm_ioctl(). Set cpu->kvm_state in advance to avoid NULL pointer
+     * dereference.
+     */
+    cpu->kvm_state = s;
+    ret = kvm_arch_pre_create_vcpu(cpu, errp);
+    if (ret < 0) {
+        cpu->kvm_state = NULL;
+        goto err;
+    }
+
    ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
    if (ret < 0) {
        error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)",
                         kvm_arch_vcpu_id(cpu));
+        cpu->kvm_state = NULL;
        goto err;
    }

    cpu->kvm_fd = ret;
-    cpu->kvm_state = s;
    cpu->vcpu_dirty = true;
    cpu->dirty_pages = 0;
    cpu->throttle_us_per_full = 0;
@@ -443,7 +487,6 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
                                   PAGE_SIZE * KVM_DIRTY_LOG_PAGE_OFFSET);
        if (cpu->kvm_dirty_gfns == MAP_FAILED) {
            ret = -errno;
-            DPRINTF("mmap'ing vcpu dirty gfns failed: %d\n", ret);
            goto err;
        }
    }
@@ -475,6 +518,9 @@ static int kvm_mem_flags(MemoryRegion *mr)
    if (readonly && kvm_readonly_mem_allowed) {
        flags |= KVM_MEM_READONLY;
    }
+    if (memory_region_has_guest_memfd(mr)) {
+        flags |= KVM_MEM_GUEST_MEMFD;
+    }
    return flags;
 }

@@ -1130,6 +1176,11 @@ int kvm_vm_check_extension(KVMState *s, unsigned int extension)
    return ret;
 }

+/*
+ * We track the poisoned pages to be able to:
+ * - replace them on VM reset
+ * - block a migration for a VM with a poisoned page
+ */
 typedef struct HWPoisonPage {
    ram_addr_t ram_addr;
    QLIST_ENTRY(HWPoisonPage) list;
@@ -1163,6 +1214,11 @@ void kvm_hwpoison_page_add(ram_addr_t ram_addr)
    QLIST_INSERT_HEAD(&hwpoison_page_list, page, list);
 }

+bool kvm_hwpoisoned_mem(void)
+{
+    return !QLIST_EMPTY(&hwpoison_page_list);
+}
+
 static uint32_t adjust_ioeventfd_endianness(uint32_t val, uint32_t size)
 {
 #if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN
@@ -1266,6 +1322,46 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size)
    kvm_max_slot_size = max_slot_size;
 }

+static int kvm_set_memory_attributes(hwaddr start, hwaddr size, uint64_t attr)
+{
+    struct kvm_memory_attributes attrs;
+    int r;
+
+    if (kvm_supported_memory_attributes == 0) {
+        error_report("No memory attribute supported by KVM\n");
+        return -EINVAL;
+    }
+
+    if ((attr & kvm_supported_memory_attributes) != attr) {
+        error_report("memory attribute 0x%lx not supported by KVM,"
+                     " supported bits are 0x%lx\n",
+                     attr, kvm_supported_memory_attributes);
+        return -EINVAL;
+    }
+
+    attrs.attributes = attr;
+    attrs.address = start;
+    attrs.size = size;
+    attrs.flags = 0;
+
+    r = kvm_vm_ioctl(kvm_state, KVM_SET_MEMORY_ATTRIBUTES, &attrs);
+    if (r) {
+        error_report("failed to set memory (0x%lx+%#zx) with attr 0x%lx error '%s'",
+                     start, size, attr, strerror(errno));
+    }
+    return r;
+}
+
+int kvm_set_memory_attributes_private(hwaddr start, hwaddr size)
+{
+    return kvm_set_memory_attributes(start, size, KVM_MEMORY_ATTRIBUTE_PRIVATE);
+}
+
+int kvm_set_memory_attributes_shared(hwaddr start, hwaddr size)
+{
+    return kvm_set_memory_attributes(start, size, 0);
+}
+
 /* Called with KVMMemoryListener.slots_lock held */
 static void kvm_set_phys_mem(KVMMemoryListener *kml,
                             MemoryRegionSection *section, bool add)
@@ -1362,6 +1458,9 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
        mem->ram_start_offset = ram_start_offset;
        mem->ram = ram;
        mem->flags = kvm_mem_flags(mr);
+        mem->guest_memfd = mr->ram_block->guest_memfd;
+        mem->guest_memfd_offset = (uint8_t*)ram - mr->ram_block->host;
+
        kvm_slot_init_dirty_bitmap(mem);
        err = kvm_set_user_memory_region(kml, mem, true);
        if (err) {
@@ -1369,6 +1468,16 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
                    strerror(-err));
            abort();
        }
+
+        if (memory_region_has_guest_memfd(mr)) {
+            err = kvm_set_memory_attributes_private(start_addr, slot_size);
+            if (err) {
+                error_report("%s: failed to set memory attribute private: %s\n",
+                             __func__, strerror(-err));
+                exit(1);
+            }
+        }
+
        start_addr += slot_size;
        ram_start_offset += slot_size;
        ram += slot_size;
@@ -2396,6 +2505,11 @@ static int kvm_init(MachineState *ms)
    }
    s->as = g_new0(struct KVMAs, s->nr_as);

+    kvm_guest_memfd_supported = kvm_check_extension(s, KVM_CAP_GUEST_MEMFD);
+
+    ret = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES);
+    kvm_supported_memory_attributes = ret > 0 ? ret : 0;
+
    if (object_property_find(OBJECT(current_machine), "kvm-type")) {
        g_autofree char *kvm_type = object_property_get_str(OBJECT(current_machine),
                                                            "kvm-type",
@@ -2816,12 +2930,101 @@ static void kvm_eat_signals(CPUState *cpu)
    } while (sigismember(&chkset, SIG_IPI));
 }

+int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private)
+{
+    MemoryRegionSection section;
+    ram_addr_t offset;
+    MemoryRegion *mr;
+    RAMBlock *rb;
+    void *addr;
+    int ret = -1;
+
+    trace_kvm_convert_memory(start, size, to_private ? "shared_to_private" : "private_to_shared");
+
+    if (!QEMU_PTR_IS_ALIGNED(start, qemu_host_page_size) ||
+        !QEMU_PTR_IS_ALIGNED(size, qemu_host_page_size)) {
+        return -1;
+    }
+
+    if (!size) {
+        return -1;
+    }
+
+    section = memory_region_find(get_system_memory(), start, size);
+    mr = section.mr;
+    if (!mr) {
+        /*
+         * Ignore converting non-assigned region to shared.
+         *
+         * TDX requires vMMIO region to be shared to inject #VE to guest.
+         * OVMF issues conservatively MapGPA(shared) on 32bit PCI MMIO region,
+         * and vIO-APIC 0xFEC00000 4K page.
+         * OVMF assigns 32bit PCI MMIO region to
+         * [top of low memory: typically 2GB=0xC000000,  0xFC00000)
+         */
+        if (!to_private) {
+            return 0;
+        }
+        return -1;
+    }
+
+    if (memory_region_has_guest_memfd(mr)) {
+        if (to_private) {
+            ret = kvm_set_memory_attributes_private(start, size);
+        } else {
+            ret = kvm_set_memory_attributes_shared(start, size);
+        }
+
+        if (ret) {
+            memory_region_unref(section.mr);
+            return ret;
+        }
+
+        addr = memory_region_get_ram_ptr(mr) + section.offset_within_region;
+        rb = qemu_ram_block_from_host(addr, false, &offset);
+
+        if (to_private) {
+            if (rb->page_size != qemu_host_page_size) {
+                /*
+                * shared memory is back'ed by  hugetlb, which is supposed to be
+                * pre-allocated and doesn't need to be discarded
+                */
+                return 0;
+            } else {
+                ret = ram_block_discard_range(rb, offset, size);
+            }
+        } else {
+            ret = ram_block_discard_guest_memfd_range(rb, offset, size);
+        }
+    } else {
+        /*
+         * Because vMMIO region must be shared, guest TD may convert vMMIO
+         * region to shared explicitly.  Don't complain such case.  See
+         * memory_region_type() for checking if the region is MMIO region.
+         */
+        if (!to_private &&
+            !memory_region_is_ram(mr) &&
+            !memory_region_is_ram_device(mr) &&
+            !memory_region_is_rom(mr) &&
+            !memory_region_is_romd(mr)) {
+		    ret = 0;
+        } else {
+            error_report("Convert non guest_memfd backed memory region "
+                        "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s",
+                        start, size, to_private ? "private" : "shared");
+        }
+    }
+
+    memory_region_unref(section.mr);
+    return ret;
+}
+
 int kvm_cpu_exec(CPUState *cpu)
 {
    struct kvm_run *run = cpu->kvm_run;
    int ret, run_ret;

-    DPRINTF("kvm_cpu_exec()\n");
+    trace_kvm_cpu_exec();

    if (kvm_arch_process_async_events(cpu)) {
        qatomic_set(&cpu->exit_request, 0);
@@ -2848,7 +3051,7 @@ int kvm_cpu_exec(CPUState *cpu)

        kvm_arch_pre_run(cpu, run);
        if (qatomic_read(&cpu->exit_request)) {
-            DPRINTF("interrupt exit requested\n");
+	    trace_kvm_interrupt_exit_request();
            /*
             * KVM requires us to reenter the kernel after IO exits to complete
             * instruction emulation. This self-signal will ensure that we
@@ -2878,29 +3081,30 @@ int kvm_cpu_exec(CPUState *cpu)

        if (run_ret < 0) {
            if (run_ret == -EINTR || run_ret == -EAGAIN) {
-                DPRINTF("io window exit\n");
+                trace_kvm_io_window_exit();
                kvm_eat_signals(cpu);
                ret = EXCP_INTERRUPT;
                break;
            }
-            fprintf(stderr, "error: kvm run failed %s\n",
-                    strerror(-run_ret));
+            if (!(run_ret == -EFAULT && run->exit_reason == KVM_EXIT_MEMORY_FAULT)) {
+                fprintf(stderr, "error: kvm run failed %s\n",
+                        strerror(-run_ret));
 #ifdef TARGET_PPC
-            if (run_ret == -EBUSY) {
-                fprintf(stderr,
-                        "This is probably because your SMT is enabled.\n"
-                        "VCPU can only run on primary threads with all "
-                        "secondary threads offline.\n");
-            }
+                if (run_ret == -EBUSY) {
+                    fprintf(stderr,
+                            "This is probably because your SMT is enabled.\n"
+                            "VCPU can only run on primary threads with all "
+                            "secondary threads offline.\n");
+                }
 #endif
-            ret = -1;
-            break;
+                ret = -1;
+                break;
+            }
        }

        trace_kvm_run_exit(cpu->cpu_index, run->exit_reason);
        switch (run->exit_reason) {
        case KVM_EXIT_IO:
-            DPRINTF("handle_io\n");
            /* Called outside BQL */
            kvm_handle_io(run->io.port, attrs,
                          (uint8_t *)run + run->io.data_offset,
@@ -2910,7 +3114,6 @@ int kvm_cpu_exec(CPUState *cpu)
            ret = 0;
            break;
        case KVM_EXIT_MMIO:
-            DPRINTF("handle_mmio\n");
            /* Called outside BQL */
            address_space_rw(&address_space_memory,
                             run->mmio.phys_addr, attrs,
@@ -2920,11 +3123,9 @@ int kvm_cpu_exec(CPUState *cpu)
            ret = 0;
            break;
        case KVM_EXIT_IRQ_WINDOW_OPEN:
-            DPRINTF("irq_window_open\n");
            ret = EXCP_INTERRUPT;
            break;
        case KVM_EXIT_SHUTDOWN:
-            DPRINTF("shutdown\n");
            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
            ret = EXCP_INTERRUPT;
            break;
@@ -2959,6 +3160,7 @@ int kvm_cpu_exec(CPUState *cpu)
            ret = 0;
            break;
        case KVM_EXIT_SYSTEM_EVENT:
+            trace_kvm_run_exit_system_event(cpu->cpu_index, run->system_event.type);
            switch (run->system_event.type) {
            case KVM_SYSTEM_EVENT_SHUTDOWN:
                qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
@@ -2976,13 +3178,21 @@ int kvm_cpu_exec(CPUState *cpu)
                ret = 0;
                break;
            default:
-                DPRINTF("kvm_arch_handle_exit\n");
                ret = kvm_arch_handle_exit(cpu, run);
                break;
            }
            break;
+        case KVM_EXIT_MEMORY_FAULT:
+            if (run->memory_fault.flags & ~KVM_MEMORY_EXIT_FLAG_PRIVATE) {
+                error_report("KVM_EXIT_MEMORY_FAULT: Unknown flag 0x%" PRIx64,
+                             (uint64_t)run->memory_fault.flags);
+                ret = -1;
+                break;
+            }
+            ret = kvm_convert_memory(run->memory_fault.gpa, run->memory_fault.size,
+                                     run->memory_fault.flags & KVM_MEMORY_EXIT_FLAG_PRIVATE);
+            break;
        default:
-            DPRINTF("kvm_arch_handle_exit\n");
            ret = kvm_arch_handle_exit(cpu, run);
            break;
        }
@@ -4077,3 +4287,25 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp)
        query_stats_schema_vcpu(first_cpu, &stats_args);
    }
 }
+
+int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp)
+{
+    int fd;
+    struct kvm_create_guest_memfd guest_memfd = {
+        .size = size,
+        .flags = flags,
+    };
+
+    if (!kvm_guest_memfd_supported) {
+        error_setg(errp, "KVM doesn't support guest memfd\n");
+        return -1;
+    }
+
+    fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_GUEST_MEMFD, &guest_memfd);
+    if (fd < 0) {
+        error_setg_errno(errp, errno, "Error creating kvm guest memfd");
+        return -1;
+    }
+
+    return fd;
+}
--- a/accel/kvm/trace-events
+++ b/accel/kvm/trace-events
@@ -15,7 +15,7 @@ kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
 kvm_irqchip_release_virq(int virq) "virq %d"
 kvm_set_ioeventfd_mmio(int fd, uint64_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%" PRIx64 " val=0x%x assign: %d size: %d match: %d"
 kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%x val=0x%x assign: %d size: %d match: %d"
-kvm_set_user_memory(uint32_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d"
+kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, uint32_t fd, uint64_t fd_offset, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " guest_memfd=%d" " guest_memfd_offset=0x%" PRIx64 " ret=%d"
 kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32
 kvm_resample_fd_notify(int gsi) "gsi %d"
 kvm_dirty_ring_full(int id) "vcpu %d"
@@ -25,4 +25,10 @@ kvm_dirty_ring_reaper(const char *s) "%s"
 kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages (took %"PRIi64" us)"
 kvm_dirty_ring_reaper_kick(const char *reason) "%s"
 kvm_dirty_ring_flush(int finished) "%d"
-
+kvm_destroy_vcpu(void) ""
+kvm_failed_get_vcpu_mmap_size(void) ""
+kvm_cpu_exec(void) ""
+kvm_interrupt_exit_request(void) ""
+kvm_io_window_exit(void) ""
+kvm_run_exit_system_event(int cpu_index, uint32_t event_type) "cpu_index %d, system_even_type %"PRIu32
+kvm_convert_memory(uint64_t start, uint64_t size, const char *msg) "start 0x%" PRIx64 " size 0x%" PRIx64 " %s"
--- a/accel/stubs/kvm-stub.c
+++ b/accel/stubs/kvm-stub.c
@@ -124,3 +124,13 @@ uint32_t kvm_dirty_ring_size(void)
 {
    return 0;
 }
+
+bool kvm_hwpoisoned_mem(void)
+{
+    return false;
+}
+
+int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp)
+{
+    return -ENOSYS;
+}
--- a/audio/meson.build
+++ b/audio/meson.build
@@ -30,7 +30,8 @@ endforeach

 if dbus_display
    module_ss = ss.source_set()
-    module_ss.add(when: [gio, pixman], if_true: files('dbusaudio.c'))
+    module_ss.add(when: [gio, dbus_display1_dep, pixman],
+                  if_true: files('dbusaudio.c'))
    audio_modules += {'dbus': module_ss}
 endif

--- a/backends/hostmem-epc.c
+++ b/backends/hostmem-epc.c
@@ -17,31 +17,29 @@
 #include "sysemu/hostmem.h"
 #include "hw/i386/hostmem-epc.h"

-static void
+static bool
 sgx_epc_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
 {
+    g_autofree char *name = NULL;
    uint32_t ram_flags;
-    char *name;
    int fd;

    if (!backend->size) {
        error_setg(errp, "can't create backend with size 0");
-        return;
+        return false;
    }

    fd = qemu_open_old("/dev/sgx_vepc", O_RDWR);
    if (fd < 0) {
        error_setg_errno(errp, errno,
                         "failed to open /dev/sgx_vepc to alloc SGX EPC");
-        return;
+        return false;
    }

    name = object_get_canonical_path(OBJECT(backend));
    ram_flags = (backend->share ? RAM_SHARED : 0) | RAM_PROTECTED;
-    memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend),
-                                   name, backend->size, ram_flags,
-                                   fd, 0, errp);
-    g_free(name);
+    return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name,
+                                          backend->size, ram_flags, fd, 0, errp);
 }

 static void sgx_epc_backend_instance_init(Object *obj)
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -36,24 +36,25 @@ struct HostMemoryBackendFile {
    OnOffAuto rom;
 };

-static void
+static bool
 file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
 {
 #ifndef CONFIG_POSIX
    error_setg(errp, "backend '%s' not supported on this host",
               object_get_typename(OBJECT(backend)));
+    return false;
 #else
    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(backend);
+    g_autofree gchar *name = NULL;
    uint32_t ram_flags;
-    gchar *name;

    if (!backend->size) {
        error_setg(errp, "can't create backend with size 0");
-        return;
+        return false;
    }
    if (!fb->mem_path) {
        error_setg(errp, "mem-path property not set");
-        return;
+        return false;
    }

    switch (fb->rom) {
@@ -65,18 +66,18 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
        if (!fb->readonly) {
            error_setg(errp, "property 'rom' = 'on' is not supported with"
                       " 'readonly' = 'off'");
-            return;
+            return false;
        }
        break;
    case ON_OFF_AUTO_OFF:
        if (fb->readonly && backend->share) {
            error_setg(errp, "property 'rom' = 'off' is incompatible with"
                       " 'readonly' = 'on' and 'share' = 'on'");
-            return;
+            return false;
        }
        break;
    default:
-        assert(false);
+        g_assert_not_reached();
    }

    name = host_memory_backend_get_name(backend);
@@ -84,12 +85,12 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
    ram_flags |= fb->readonly ? RAM_READONLY_FD : 0;
    ram_flags |= fb->rom == ON_OFF_AUTO_ON ? RAM_READONLY : 0;
    ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
+    ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
    ram_flags |= fb->is_pmem ? RAM_PMEM : 0;
    ram_flags |= RAM_NAMED_FILE;
-    memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name,
-                                     backend->size, fb->align, ram_flags,
-                                     fb->mem_path, fb->offset, errp);
-    g_free(name);
+    return memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name,
+                                            backend->size, fb->align, ram_flags,
+                                            fb->mem_path, fb->offset, errp);
 #endif
 }

--- a/backends/hostmem-memfd.c
+++ b/backends/hostmem-memfd.c
@@ -31,17 +31,17 @@ struct HostMemoryBackendMemfd {
    bool seal;
 };

-static void
+static bool
 memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
 {
    HostMemoryBackendMemfd *m = MEMORY_BACKEND_MEMFD(backend);
+    g_autofree char *name = NULL;
    uint32_t ram_flags;
-    char *name;
    int fd;

    if (!backend->size) {
        error_setg(errp, "can't create backend with size 0");
-        return;
+        return false;
    }

    fd = qemu_memfd_create(TYPE_MEMORY_BACKEND_MEMFD, backend->size,
@@ -49,15 +49,15 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
                           F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL : 0,
                           errp);
    if (fd == -1) {
-        return;
+        return false;
    }

    name = host_memory_backend_get_name(backend);
    ram_flags = backend->share ? RAM_SHARED : 0;
    ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
-    memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name,
-                                   backend->size, ram_flags, fd, 0, errp);
-    g_free(name);
+    ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
+    return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name,
+                                          backend->size, ram_flags, fd, 0, errp);
 }

 static bool
--- a/backends/hostmem-ram.c
+++ b/backends/hostmem-ram.c
@@ -16,23 +16,24 @@
 #include "qemu/module.h"
 #include "qom/object_interfaces.h"

-static void
+static bool
 ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
 {
+    g_autofree char *name = NULL;
    uint32_t ram_flags;
-    char *name;

    if (!backend->size) {
        error_setg(errp, "can't create backend with size 0");
-        return;
+        return false;
    }

    name = host_memory_backend_get_name(backend);
    ram_flags = backend->share ? RAM_SHARED : 0;
    ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
-    memory_region_init_ram_flags_nomigrate(&backend->mr, OBJECT(backend), name,
-                                           backend->size, ram_flags, errp);
-    g_free(name);
+    ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
+    return memory_region_init_ram_flags_nomigrate(&backend->mr, OBJECT(backend),
+                                                  name, backend->size,
+                                                  ram_flags, errp);
 }

 static void
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -219,7 +219,6 @@ static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
 static void host_memory_backend_set_prealloc(Object *obj, bool value,
                                             Error **errp)
 {
-    Error *local_err = NULL;
    HostMemoryBackend *backend = MEMORY_BACKEND(obj);

    if (!backend->reserve && value) {
@@ -237,10 +236,8 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value,
        void *ptr = memory_region_get_ram_ptr(&backend->mr);
        uint64_t sz = memory_region_size(&backend->mr);

-        qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads,
-                          backend->prealloc_context, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
+        if (!qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads,
+                               backend->prealloc_context, errp)) {
            return;
        }
        backend->prealloc = true;
@@ -279,6 +276,7 @@ static void host_memory_backend_init(Object *obj)
    /* TODO: convert access to globals to compat properties */
    backend->merge = machine_mem_merge(machine);
    backend->dump = machine_dump_guest_core(machine);
+    backend->guest_memfd = machine_require_guest_memfd(machine);
    backend->reserve = true;
    backend->prealloc_threads = machine->smp.cpus;
 }
@@ -324,91 +322,86 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
 {
    HostMemoryBackend *backend = MEMORY_BACKEND(uc);
    HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
-    Error *local_err = NULL;
    void *ptr;
    uint64_t sz;

-    if (bc->alloc) {
-        bc->alloc(backend, &local_err);
-        if (local_err) {
-            goto out;
-        }
+    if (!bc->alloc) {
+        return;
+    }
+    if (!bc->alloc(backend, errp)) {
+        return;
+    }

-        ptr = memory_region_get_ram_ptr(&backend->mr);
-        sz = memory_region_size(&backend->mr);
+    ptr = memory_region_get_ram_ptr(&backend->mr);
+    sz = memory_region_size(&backend->mr);

-        if (backend->merge) {
-            qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
-        }
-        if (!backend->dump) {
-            qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
-        }
+    if (backend->merge) {
+        qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
+    }
+    if (!backend->dump) {
+        qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
+    }
 #ifdef CONFIG_NUMA
-        unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
-        /* lastbit == MAX_NODES means maxnode = 0 */
-        unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
-        /* ensure policy won't be ignored in case memory is preallocated
-         * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
-         * this doesn't catch hugepage case. */
-        unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
-        int mode = backend->policy;
+    unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
+    /* lastbit == MAX_NODES means maxnode = 0 */
+    unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
+    /* ensure policy won't be ignored in case memory is preallocated
+     * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
+     * this doesn't catch hugepage case. */
+    unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
+    int mode = backend->policy;

-        /* check for invalid host-nodes and policies and give more verbose
-         * error messages than mbind(). */
-        if (maxnode && backend->policy == MPOL_DEFAULT) {
-            error_setg(errp, "host-nodes must be empty for policy default,"
-                       " or you should explicitly specify a policy other"
-                       " than default");
-            return;
-        } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
-            error_setg(errp, "host-nodes must be set for policy %s",
-                       HostMemPolicy_str(backend->policy));
-            return;
-        }
+    /* check for invalid host-nodes and policies and give more verbose
+     * error messages than mbind(). */
+    if (maxnode && backend->policy == MPOL_DEFAULT) {
+        error_setg(errp, "host-nodes must be empty for policy default,"
+                   " or you should explicitly specify a policy other"
+                   " than default");
+        return;
+    } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
+        error_setg(errp, "host-nodes must be set for policy %s",
+                   HostMemPolicy_str(backend->policy));
+        return;
+    }

-        /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
-         * as argument to mbind() due to an old Linux bug (feature?) which
-         * cuts off the last specified node. This means backend->host_nodes
-         * must have MAX_NODES+1 bits available.
-         */
-        assert(sizeof(backend->host_nodes) >=
-               BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
-        assert(maxnode <= MAX_NODES);
+    /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
+     * as argument to mbind() due to an old Linux bug (feature?) which
+     * cuts off the last specified node. This means backend->host_nodes
+     * must have MAX_NODES+1 bits available.
+     */
+    assert(sizeof(backend->host_nodes) >=
+           BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
+    assert(maxnode <= MAX_NODES);

 #ifdef HAVE_NUMA_HAS_PREFERRED_MANY
-        if (mode == MPOL_PREFERRED && numa_has_preferred_many() > 0) {
-            /*
-             * Replace with MPOL_PREFERRED_MANY otherwise the mbind() below
-             * silently picks the first node.
-             */
-            mode = MPOL_PREFERRED_MANY;
-        }
+    if (mode == MPOL_PREFERRED && numa_has_preferred_many() > 0) {
+        /*
+         * Replace with MPOL_PREFERRED_MANY otherwise the mbind() below
+         * silently picks the first node.
+         */
+        mode = MPOL_PREFERRED_MANY;
+    }
 #endif

-        if (maxnode &&
-            mbind(ptr, sz, mode, backend->host_nodes, maxnode + 1, flags)) {
-            if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
-                error_setg_errno(errp, errno,
-                                 "cannot bind memory to host NUMA nodes");
-                return;
-            }
-        }
-#endif
-        /* Preallocate memory after the NUMA policy has been instantiated.
-         * This is necessary to guarantee memory is allocated with
-         * specified NUMA policy in place.
-         */
-        if (backend->prealloc) {
-            qemu_prealloc_mem(memory_region_get_fd(&backend->mr), ptr, sz,
-                              backend->prealloc_threads,
-                              backend->prealloc_context, &local_err);
-            if (local_err) {
-                goto out;
-            }
+    if (maxnode &&
+        mbind(ptr, sz, mode, backend->host_nodes, maxnode + 1, flags)) {
+        if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
+            error_setg_errno(errp, errno,
+                             "cannot bind memory to host NUMA nodes");
+            return;
        }
    }
-out:
-    error_propagate(errp, local_err);
+#endif
+    /* Preallocate memory after the NUMA policy has been instantiated.
+     * This is necessary to guarantee memory is allocated with
+     * specified NUMA policy in place.
+     */
+    if (backend->prealloc && !qemu_prealloc_mem(memory_region_get_fd(&backend->mr),
+                                                ptr, sz,
+                                                backend->prealloc_threads,
+                                                backend->prealloc_context, errp)) {
+        return;
+    }
 }

 static bool
--- a/block.c
+++ b/block.c
@@ -6344,7 +6344,7 @@ BlockDeviceInfoList *bdrv_named_nodes_list(bool flat,
    BlockDriverState *bs;

    GLOBAL_STATE_CODE();
-    GRAPH_RDLOCK_GUARD_MAINLOOP();
+    GRAPH_RDLOCK_GUARD();

    list = NULL;
    QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
--- a/block/blkio.c
+++ b/block/blkio.c
@@ -68,7 +68,7 @@ typedef struct {
    CoQueue bounce_available;

    /* The value of the "mem-region-alignment" property */
-    size_t mem_region_alignment;
+    uint64_t mem_region_alignment;

    /* Can we skip adding/deleting blkio_mem_regions? */
    bool needs_mem_regions;
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -226,6 +226,9 @@ typedef struct RawPosixAIOData {
        struct {
            unsigned long op;
        } zone_mgmt;
+        struct {
+            struct stat *st;
+        } fstat;
    };
 } RawPosixAIOData;

@@ -2613,6 +2616,34 @@ static void raw_close(BlockDriverState *bs)
    }
 }

+static int handle_aiocb_fstat(void *opaque)
+{
+    RawPosixAIOData *aiocb = opaque;
+
+    if (fstat(aiocb->aio_fildes, aiocb->fstat.st) < 0) {
+        return -errno;
+    }
+
+    return 0;
+}
+
+static int coroutine_fn raw_co_fstat(BlockDriverState *bs, struct stat *st)
+{
+    BDRVRawState *s = bs->opaque;
+    RawPosixAIOData acb;
+
+    acb = (RawPosixAIOData) {
+        .bs             = bs,
+        .aio_fildes     = s->fd,
+        .aio_type       = QEMU_AIO_FSTAT,
+        .fstat          = {
+            .st = st,
+        },
+    };
+
+    return raw_thread_pool_submit(handle_aiocb_fstat, &acb);
+}
+
 /**
 * Truncates the given regular file @fd to @offset and, when growing, fills the
 * new space according to @prealloc.
@@ -2857,11 +2888,14 @@ static int64_t coroutine_fn raw_co_getlength(BlockDriverState *bs)
 static int64_t coroutine_fn raw_co_get_allocated_file_size(BlockDriverState *bs)
 {
    struct stat st;
-    BDRVRawState *s = bs->opaque;
+    int ret;

-    if (fstat(s->fd, &st) < 0) {
-        return -errno;
+    ret = raw_co_fstat(bs, &st);
+
+    if (ret) {
+        return ret;
    }
+
    return (int64_t)st.st_blocks * 512;
 }

--- a/block/meson.build
+++ b/block/meson.build
@@ -149,6 +149,7 @@ block_gen_c = custom_target('block-gen.c',
                                      '../include/block/dirty-bitmap.h',
                                      '../include/block/block_int-io.h',
                                      '../include/block/block-global-state.h',
+                                      '../include/block/qapi.h',
                                      '../include/sysemu/block-backend-global-state.h',
                                      '../include/sysemu/block-backend-io.h',
                                      'coroutines.h'
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -400,7 +400,7 @@ void hmp_nbd_server_start(Monitor *mon, const QDict *qdict)
    bool writable = qdict_get_try_bool(qdict, "writable", false);
    bool all = qdict_get_try_bool(qdict, "all", false);
    Error *local_err = NULL;
-    BlockInfoList *block_list, *info;
+    BlockBackend *blk;
    SocketAddress *addr;
    NbdServerAddOptions export;

@@ -425,18 +425,24 @@ void hmp_nbd_server_start(Monitor *mon, const QDict *qdict)
        return;
    }

-    /* Then try adding all block devices.  If one fails, close all and
+    /*
+     * Then try adding all block devices.  If one fails, close all and
     * exit.
     */
-    block_list = qmp_query_block(NULL);
+    for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
+        BlockDriverState *bs = blk_bs(blk);

-    for (info = block_list; info; info = info->next) {
-        if (!info->value->inserted) {
+        if (!*blk_name(blk) && !blk_get_attached_dev(blk)) {
+            continue;
+        }
+
+        bs = bdrv_skip_implicit_filters(bs);
+        if (!bs || !bs->drv) {
            continue;
        }

        export = (NbdServerAddOptions) {
-            .device         = info->value->device,
+            .device         = g_strdup(blk_name(blk)),
            .has_writable   = true,
            .writable       = writable,
        };
@@ -449,8 +455,6 @@ void hmp_nbd_server_start(Monitor *mon, const QDict *qdict)
        }
    }

-    qapi_free_BlockInfoList(block_list);
-
 exit:
    hmp_handle_error(mon, local_err);
 }
@@ -744,7 +748,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info,
    }
 }

-void hmp_info_block(Monitor *mon, const QDict *qdict)
+void coroutine_fn hmp_info_block(Monitor *mon, const QDict *qdict)
 {
    BlockInfoList *block_list, *info;
    BlockDeviceInfoList *blockdev_list, *blockdev;
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -41,10 +41,10 @@
 #include "qemu/qemu-print.h"
 #include "sysemu/block-backend.h"

-BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
-                                        BlockDriverState *bs,
-                                        bool flat,
-                                        Error **errp)
+BlockDeviceInfo *coroutine_fn bdrv_block_device_info(BlockBackend *blk,
+                                                     BlockDriverState *bs,
+                                                     bool flat,
+                                                     Error **errp)
 {
    ImageInfo **p_image_info;
    ImageInfo *backing_info;
@@ -234,8 +234,6 @@ bdrv_do_query_node_info(BlockDriverState *bs, BlockNodeInfo *info, Error **errp)
    int ret;
    Error *err = NULL;

-    aio_context_acquire(bdrv_get_aio_context(bs));
-
    size = bdrv_getlength(bs);
    if (size < 0) {
        error_setg_errno(errp, -size, "Can't get image size '%s'",
@@ -248,7 +246,9 @@ bdrv_do_query_node_info(BlockDriverState *bs, BlockNodeInfo *info, Error **errp)
    info->filename        = g_strdup(bs->filename);
    info->format          = g_strdup(bdrv_get_format_name(bs));
    info->virtual_size    = size;
-    info->actual_size     = bdrv_get_allocated_file_size(bs);
+    bdrv_graph_co_rdlock();
+    info->actual_size     = bdrv_co_get_allocated_file_size(bs);
+    bdrv_graph_co_rdunlock();
    info->has_actual_size = info->actual_size >= 0;
    if (bs->encrypted) {
        info->encrypted = true;
@@ -304,7 +304,7 @@ bdrv_do_query_node_info(BlockDriverState *bs, BlockNodeInfo *info, Error **errp)
    }

 out:
-    aio_context_release(bdrv_get_aio_context(bs));
+    return;
 }

 /**
@@ -374,7 +374,7 @@ fail:
 }

 /**
- * bdrv_query_block_graph_info:
+ * bdrv_co_query_block_graph_info:
 * @bs: root node to start from
 * @p_info: location to store image information
 * @errp: location to store error information
@@ -383,15 +383,17 @@ fail:
 *
 * @p_info will be set only on success. On error, store error in @errp.
 */
-void bdrv_query_block_graph_info(BlockDriverState *bs,
-                                 BlockGraphInfo **p_info,
-                                 Error **errp)
+void coroutine_fn bdrv_co_query_block_graph_info(BlockDriverState *bs,
+                                                 BlockGraphInfo **p_info,
+                                                 Error **errp)
 {
    BlockGraphInfo *info;
    BlockChildInfoList **children_list_tail;
    BdrvChild *c;
    ERRP_GUARD();

+    assert_bdrv_graph_readable();
+
    info = g_new0(BlockGraphInfo, 1);
    bdrv_do_query_node_info(bs, qapi_BlockGraphInfo_base(info), errp);
    if (*errp) {
@@ -407,7 +409,7 @@ void bdrv_query_block_graph_info(BlockDriverState *bs,
        QAPI_LIST_APPEND(children_list_tail, c_info);

        c_info->name = g_strdup(c->name);
-        bdrv_query_block_graph_info(c->bs, &c_info->info, errp);
+        bdrv_co_query_block_graph_info(c->bs, &c_info->info, errp);
        if (*errp) {
            goto fail;
        }
@@ -665,13 +667,13 @@ bdrv_query_bds_stats(BlockDriverState *bs, bool blk_level)
    return s;
 }

-BlockInfoList *qmp_query_block(Error **errp)
+BlockInfoList *coroutine_fn qmp_query_block(Error **errp)
 {
    BlockInfoList *head = NULL, **p_next = &head;
    BlockBackend *blk;
    Error *local_err = NULL;

-    GRAPH_RDLOCK_GUARD_MAINLOOP();
+    GRAPH_RDLOCK_GUARD();

    for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
        BlockInfoList *info;
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -389,7 +389,7 @@ int bdrv_snapshot_list(BlockDriverState *bs,
                       QEMUSnapshotInfo **psn_info)
 {
    GLOBAL_STATE_CODE();
-    GRAPH_RDLOCK_GUARD_MAINLOOP();
+    GRAPH_RDLOCK_GUARD();

    BlockDriver *drv = bs->drv;
    BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs);
--- a/blockdev.c
+++ b/blockdev.c
@@ -2871,9 +2871,9 @@ void qmp_drive_backup(DriveBackup *backup, Error **errp)
    blockdev_do_action(&action, errp);
 }

-BlockDeviceInfoList *qmp_query_named_block_nodes(bool has_flat,
-                                                 bool flat,
-                                                 Error **errp)
+BlockDeviceInfoList *coroutine_fn qmp_query_named_block_nodes(bool has_flat,
+                                                              bool flat,
+                                                              Error **errp)
 {
    bool return_flat = has_flat && flat;

--- a/chardev/char-fe.c
+++ b/chardev/char-fe.c
@@ -21,6 +21,7 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
+#define HW_POISON_H /* avoid poison since we patch against rules it "enforces" */
 #include "qemu/osdep.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
--- a/chardev/char-mux.c
+++ b/chardev/char-mux.c
@@ -22,6 +22,7 @@
 * THE SOFTWARE.
 */

+#define HW_POISON_H /* avoid poison since we patch against rules it "enforces" */
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu/module.h"
@@ -198,6 +199,17 @@ static void mux_chr_accept_input(Chardev *chr)
        be->chr_read(be->opaque,
                     &d->buffer[m][d->cons[m]++ & MUX_BUFFER_MASK], 1);
    }
+
+#if defined(TARGET_S390X)
+    /*
+     * We're still not able to sync producer and consumer, so let's wait a bit
+     *  and try again by then.
+     */
+    if (d->prod[m] != d->cons[m]) {
+        qemu_mod_timer(d->accept_timer, qemu_get_clock_ns(vm_clock)
+                                        + (int64_t)100000);
+    }
+#endif
 }

 static int mux_chr_can_read(void *opaque)
@@ -332,6 +344,10 @@ static void qemu_chr_open_mux(Chardev *chr,
    }

    d->focus = -1;
+#if defined(TARGET_S390X)
+    d->accept_timer = qemu_new_timer_ns(vm_clock,
+                                     (QEMUTimerCB *)mux_chr_accept_input, chr);
+#endif
    /* only default to opened state if we've realized the initial
     * set of muxes
     */
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -492,9 +492,9 @@ static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
        s->max_size <= 0) {
        return TRUE;
    }
-    len = sizeof(buf);
-    if (len > s->max_size) {
-        len = s->max_size;
+    len = tcp_chr_read_poll(opaque);
+    if (len > sizeof(buf)) {
+        len = sizeof(buf);
    }
    size = tcp_chr_recv(chr, (void *)buf, len);
    if (size == 0 || (size == -1 && errno != EAGAIN)) {
--- a/chardev/char.c
+++ b/chardev/char.c
@@ -22,6 +22,7 @@
 * THE SOFTWARE.
 */

+#define HW_POISON_H /* avoid poison since we patch against rules it "enforces" */
 #include "qemu/osdep.h"
 #include "qemu/cutils.h"
 #include "monitor/monitor.h"
--- a/chardev/chardev-internal.h
+++ b/chardev/chardev-internal.h
@@ -37,6 +37,9 @@ struct MuxChardev {
    Chardev parent;
    CharBackend *backends[MAX_MUX];
    CharBackend chr;
+#if defined(TARGET_S390X)
+    QEMUTimer *accept_timer;
+#endif
    int focus;
    int mux_cnt;
    int term_got_escape;
--- a/configs/devices/i386-softmmu/default.mak
+++ b/configs/devices/i386-softmmu/default.mak
@@ -18,6 +18,7 @@
 #CONFIG_QXL=n
 #CONFIG_SEV=n
 #CONFIG_SGA=n
+#CONFIG_TDX=n
 #CONFIG_TEST_DEVICES=n
 #CONFIG_TPM_CRB=n
 #CONFIG_TPM_TIS_ISA=n
--- a/3
+++ b/3
@@ -1675,6 +1675,9 @@ fi
 mkdir -p tests/tcg
 echo "# Automatically generated by configure - do not modify" > $config_host_mak
 echo "SRC_PATH=$source_path" >> $config_host_mak
+if test "$plugins" = "yes" ; then
+    echo "CONFIG_PLUGIN=y" >> tests/tcg/$config_host_mak
+fi

 tcg_tests_targets=
 for target in $target_list; do
--- a/contrib/ivshmem-client/meson.build
+++ b/contrib/ivshmem-client/meson.build
@@ -1,4 +1,4 @@
 executable('ivshmem-client', files('ivshmem-client.c', 'main.c'), genh,
           dependencies: glib,
           build_by_default: targetos == 'linux',
-           install: false)
+           install: true)
--- a/contrib/ivshmem-server/meson.build
+++ b/contrib/ivshmem-server/meson.build
@@ -1,4 +1,4 @@
 executable('ivshmem-server', files('ivshmem-server.c', 'main.c'), genh,
           dependencies: [qemuutil, rt],
           build_by_default: targetos == 'linux',
-           install: false)
+           install: true)
--- a/contrib/vhost-user-gpu/virgl.c
+++ b/contrib/vhost-user-gpu/virgl.c
@@ -327,7 +327,7 @@ virgl_get_resource_info_modifiers(uint32_t resource_id,
 #ifdef VIRGL_RENDERER_RESOURCE_INFO_EXT_VERSION
    struct virgl_renderer_resource_info_ext info_ext;
    ret = virgl_renderer_resource_get_info_ext(resource_id, &info_ext);
-    if (ret < 0) {
+    if (ret) {
        return ret;
    }

@@ -335,7 +335,7 @@ virgl_get_resource_info_modifiers(uint32_t resource_id,
    *modifiers = info_ext.modifiers;
 #else
    ret = virgl_renderer_resource_get_info(resource_id, info);
-    if (ret < 0) {
+    if (ret) {
        return ret;
    }

@@ -372,7 +372,7 @@ virgl_cmd_set_scanout(VuGpu *g,
        uint64_t modifiers = 0;
        ret = virgl_get_resource_info_modifiers(ss.resource_id, &info,
                                                &modifiers);
-        if (ret == -1) {
+        if (ret) {
            g_critical("%s: illegal resource specified %d\n",
                       __func__, ss.resource_id);
            cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
--- a/docs/interop/vhost-user.rst
+++ b/docs/interop/vhost-user.rst
@@ -148,9 +148,9 @@ Vring descriptor indices for packed virtqueues
 A vring address description
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^

-+-------+-------+------+------------+------+-----------+-----+
-| index | flags | size | descriptor | used | available | log |
-+-------+-------+------+------------+------+-----------+-----+
+-------+-------+------------+------+-----------+-----+
+| index | flags | descriptor | used | available | log |
+-------+-------+------------+------+-----------+-----+

 :index: a 32-bit vring index

--- a/docs/meson.build
+++ b/docs/meson.build
@@ -13,12 +13,12 @@ if sphinx_build.found()
  sphinx_version = run_command(SPHINX_ARGS + ['--version'],
                               check: true).stdout().split()[1]
  if sphinx_version.version_compare('>=1.7.0')
-    SPHINX_ARGS += ['-j', 'auto']
+    SPHINX_ARGS += ['-j', '1']
  else
    nproc = find_program('nproc')
    if nproc.found()
      jobs = run_command(nproc, check: true).stdout()
-      SPHINX_ARGS += ['-j', jobs]
+      SPHINX_ARGS += ['-j', '1']
    endif
  endif

--- a/docs/system/confidential-guest-support.rst
+++ b/docs/system/confidential-guest-support.rst
@@ -38,6 +38,7 @@ Supported mechanisms
 Currently supported confidential guest mechanisms are:

 * AMD Secure Encrypted Virtualization (SEV) (see :doc:`i386/amd-memory-encryption`)
+* Intel Trust Domain Extension (TDX) (see :doc:`i386/tdx`)
 * POWER Protected Execution Facility (PEF) (see :ref:`power-papr-protected-execution-facility-pef`)
 * s390x Protected Virtualization (PV) (see :doc:`s390x/protvirt`)

--- a/docs/system/i386/tdx.rst
+++ b/docs/system/i386/tdx.rst
@@ -0,0 +1,143 @@
+Intel Trusted Domain eXtension (TDX)
+====================================
+
+Intel Trusted Domain eXtensions (TDX) refers to an Intel technology that extends
+Virtual Machine Extensions (VMX) and Multi-Key Total Memory Encryption (MKTME)
+with a new kind of virtual machine guest called a Trust Domain (TD). A TD runs
+in a CPU mode that is designed to protect the confidentiality of its memory
+contents and its CPU state from any other software, including the hosting
+Virtual Machine Monitor (VMM), unless explicitly shared by the TD itself.
+
+Prerequisites
+-------------
+
+To run TD, the physical machine needs to have TDX module loaded and initialized
+while KVM hypervisor has TDX support and has TDX enabled. If those requirements
+are met, the ``KVM_CAP_VM_TYPES`` will report the support of ``KVM_X86_TDX_VM``.
+
+Trust Domain Virtual Firmware (TDVF)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Trust Domain Virtual Firmware (TDVF) is required to provide TD services to boot
+TD Guest OS. TDVF needs to be copied to guest private memory and measured before
+the TD boots.
+
+KVM vcpu ioctl ``KVM_MEMORY_MAPPING`` can be used to populates the TDVF content
+into its private memory.
+
+Since TDX doesn't support readonly memslot, TDVF cannot be mapped as pflash
+device and it actually works as RAM. "-bios" option is chosen to load TDVF.
+
+OVMF is the opensource firmware that implements the TDVF support. Thus the
+command line to specify and load TDVF is ``-bios OVMF.fd``
+
+KVM private memory
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+TD's memory (RAM) needs to be able to be transformed between private and shared.
+Its BIOS (OVMF/TDVF) needs to be mapped as private as well. Thus QEMU needs to
+allocate private guest memfd for them via KVM's IOCTL (KVM_CREATE_GUEST_MEMFD),
+which requires KVM is newer enough that reports KVM_CAP_GUEST_MEMFD.
+
+Feature Control
+---------------
+
+Unlike non-TDX VM, the CPU features (enumerated by CPU or MSR) of a TD is not
+under full control of VMM. VMM can only configure part of features of a TD on
+``KVM_TDX_INIT_VM`` command of VM scope ``MEMORY_ENCRYPT_OP`` ioctl.
+
+The configurable features have three types:
+
+- Attributes:
+  - PKS (bit 30) controls whether Supervisor Protection Keys is exposed to TD,
+  which determines related CPUID bit and CR4 bit;
+  - PERFMON (bit 63) controls whether PMU is exposed to TD.
+
+- XSAVE related features (XFAM):
+  XFAM is a 64b mask, which has the same format as XCR0 or IA32_XSS MSR. It
+  determines the set of extended features available for use by the guest TD.
+
+- CPUID features:
+  Only some bits of some CPUID leaves are directly configurable by VMM.
+
+What features can be configured is reported via TDX capabilities.
+
+TDX capabilities
+~~~~~~~~~~~~~~~~
+
+The VM scope ``MEMORY_ENCRYPT_OP`` ioctl provides command ``KVM_TDX_CAPABILITIES``
+to get the TDX capabilities from KVM. It returns a data structure of
+``struct kvm_tdx_capabilites``, which tells the supported configuration of
+attributes, XFAM and CPUIDs.
+
+TD attestation
+--------------
+
+In TD guest, the attestation process is used to verify the TDX guest
+trustworthiness to other entities before provisioning secrets to the guest.
+
+TD attestation is initiated first by calling TDG.MR.REPORT inside TD to get the
+REPORT. Then the REPORT data needs to be converted into a remotely verifiable
+Quote by SGX Quoting Enclave (QE).
+
+A host daemon, Quote Generation Service (QGS), provides the functionality of
+SGX GE. It provides a socket address, to which a TD guest can connect via
+"quote-generation-socket" property. On the request of <GETQUOTE> from TD guest,
+QEMU sends the TDREPORT to QGS via "quote-generation-socket" socket, and gets
+the returning Quoting and return it back to TD guest.
+
+Though "quote-generation-socket" is optional for booting the TD guest, it's a
+must for supporting TD guest atteatation.
+
+Launching a TD (TDX VM)
+-----------------------
+
+To launch a TDX guest, below are new added and required:
+
+.. parsed-literal::
+
+    |qemu_system_x86| \\
+        -object tdx-guest,id=tdx0 \\
+        -machine ...,kernel-irqchip=split,confidential-guest-support=tdx0 \\
+        -bios OVMF.fd \\
+
+If TD attestation support is wanted:
+
+.. parsed-literal::
+
+    |qemu_system_x86| \\
+        -object '{"qom-type":"tdx-guest","id":"tdx0","quote-generation-socket":{"type": "vsock", "cid":"1","port":"1234"}}' \\
+        -machine ...,kernel-irqchip=split,confidential-guest-support=tdx0 \\
+        -bios OVMF.fd \\
+
+Debugging
+---------
+
+Bit 0 of TD attributes, is DEBUG bit, which decides if the TD runs in off-TD
+debug mode. When in off-TD debug mode, TD's VCPU state and private memory are
+accessible via given SEAMCALLs. This requires KVM to expose APIs to invoke those
+SEAMCALLs and resonponding QEMU change.
+
+It's targeted as future work.
+
+restrictions
+------------
+
+ - kernel-irqchip must be split;
+
+ - No readonly support for private memory;
+
+ - No SMM support: SMM support requires manipulating the guset register states
+   which is not allowed;
+
+Live Migration
+--------------
+
+TODO
+
+References
+----------
+
+- `TDX Homepage <https://www.intel.com/content/www/us/en/developer/articles/technical/intel-trust-domain-extensions.html>`__
+
+- `SGX QE <https://github.com/intel/SGXDataCenterAttestationPrimitives/tree/master/QuoteGeneration>`__
--- a/docs/system/keys.rst.inc
+++ b/docs/system/keys.rst.inc
@@ -1,8 +1,9 @@
-During the graphical emulation, you can use special key combinations to
-change modes. The default key mappings are shown below, but if you use
-``-alt-grab`` then the modifier is Ctrl-Alt-Shift (instead of Ctrl-Alt)
-and if you use ``-ctrl-grab`` then the modifier is the right Ctrl key
-(instead of Ctrl-Alt):
+During the graphical emulation, you can use special key combinations from
+the following table to change modes. By default the modifier is Ctrl-Alt
+(used in the table below) which can be changed with ``-display`` suboption
+``mod=`` where appropriate. For example, ``-display sdl,
+grab-mod=lshift-lctrl-lalt`` changes the modifier key to Ctrl-Alt-Shift,
+while ``-display sdl,grab-mod=rctrl`` changes it to the right Ctrl key.

 Ctrl-Alt-f
   Toggle full screen
@@ -28,7 +29,7 @@ Ctrl-Alt-n
   *3*
      Serial port

-Ctrl-Alt
+Ctrl-Alt-g
   Toggle mouse and keyboard grab.

 In the virtual consoles, you can use Ctrl-Up, Ctrl-Down, Ctrl-PageUp and
--- a/docs/system/target-i386.rst
+++ b/docs/system/target-i386.rst
@@ -29,6 +29,7 @@ Architectural features
   i386/kvm-pv
   i386/sgx
   i386/amd-memory-encryption
+   i386/tdx

 OS requirements
 ~~~~~~~~~~~~~~~
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -65,6 +65,7 @@ ERST
        .help       = "show info of one block device or all block devices "
                      "(-n: show named nodes; -v: show details)",
        .cmd        = hmp_info_block,
+        .coroutine  = true,
    },

 SRST
--- a/hw/arm/aspeed_ast2400.c
+++ b/hw/arm/aspeed_ast2400.c
@@ -247,7 +247,6 @@ static void aspeed_ast2400_soc_realize(DeviceState *dev, Error **errp)
    Aspeed2400SoCState *a = ASPEED2400_SOC(dev);
    AspeedSoCState *s = ASPEED_SOC(dev);
    AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
-    Error *err = NULL;
    g_autofree char *sram_name = NULL;

    /* Default boot region (SPI memory or ROMs) */
@@ -276,9 +275,8 @@ static void aspeed_ast2400_soc_realize(DeviceState *dev, Error **errp)

    /* SRAM */
    sram_name = g_strdup_printf("aspeed.sram.%d", CPU(&a->cpu[0])->cpu_index);
-    memory_region_init_ram(&s->sram, OBJECT(s), sram_name, sc->sram_size, &err);
-    if (err) {
-        error_propagate(errp, err);
+    if (!memory_region_init_ram(&s->sram, OBJECT(s), sram_name, sc->sram_size,
+                                errp)) {
        return;
    }
    memory_region_add_subregion(s->memory,
--- a/hw/arm/aspeed_ast2600.c
+++ b/hw/arm/aspeed_ast2600.c
@@ -282,7 +282,6 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)
    Aspeed2600SoCState *a = ASPEED2600_SOC(dev);
    AspeedSoCState *s = ASPEED_SOC(dev);
    AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
-    Error *err = NULL;
    qemu_irq irq;
    g_autofree char *sram_name = NULL;

@@ -355,9 +354,8 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)

    /* SRAM */
    sram_name = g_strdup_printf("aspeed.sram.%d", CPU(&a->cpu[0])->cpu_index);
-    memory_region_init_ram(&s->sram, OBJECT(s), sram_name, sc->sram_size, &err);
-    if (err) {
-        error_propagate(errp, err);
+    if (!memory_region_init_ram(&s->sram, OBJECT(s), sram_name, sc->sram_size,
+                                errp)) {
        return;
    }
    memory_region_add_subregion(s->memory,
--- a/hw/arm/fsl-imx25.c
+++ b/hw/arm/fsl-imx25.c
@@ -81,7 +81,6 @@ static void fsl_imx25_realize(DeviceState *dev, Error **errp)
 {
    FslIMX25State *s = FSL_IMX25(dev);
    uint8_t i;
-    Error *err = NULL;

    if (!qdev_realize(DEVICE(&s->cpu), NULL, errp)) {
        return;
@@ -281,28 +280,22 @@ static void fsl_imx25_realize(DeviceState *dev, Error **errp)
                                                       FSL_IMX25_WDT_IRQ));

    /* initialize 2 x 16 KB ROM */
-    memory_region_init_rom(&s->rom[0], OBJECT(dev), "imx25.rom0",
-                           FSL_IMX25_ROM0_SIZE, &err);
-    if (err) {
-        error_propagate(errp, err);
+    if (!memory_region_init_rom(&s->rom[0], OBJECT(dev), "imx25.rom0",
+                                FSL_IMX25_ROM0_SIZE, errp)) {
        return;
    }
    memory_region_add_subregion(get_system_memory(), FSL_IMX25_ROM0_ADDR,
                                &s->rom[0]);
-    memory_region_init_rom(&s->rom[1], OBJECT(dev), "imx25.rom1",
-                           FSL_IMX25_ROM1_SIZE, &err);
-    if (err) {
-        error_propagate(errp, err);
+    if (!memory_region_init_rom(&s->rom[1], OBJECT(dev), "imx25.rom1",
+                                FSL_IMX25_ROM1_SIZE, errp)) {
        return;
    }
    memory_region_add_subregion(get_system_memory(), FSL_IMX25_ROM1_ADDR,
                                &s->rom[1]);

    /* initialize internal RAM (128 KB) */
-    memory_region_init_ram(&s->iram, NULL, "imx25.iram", FSL_IMX25_IRAM_SIZE,
-                           &err);
-    if (err) {
-        error_propagate(errp, err);
+    if (!memory_region_init_ram(&s->iram, NULL, "imx25.iram",
+                                FSL_IMX25_IRAM_SIZE, errp)) {
        return;
    }
    memory_region_add_subregion(get_system_memory(), FSL_IMX25_IRAM_ADDR,
--- a/hw/arm/fsl-imx31.c
+++ b/hw/arm/fsl-imx31.c
@@ -63,7 +63,6 @@ static void fsl_imx31_realize(DeviceState *dev, Error **errp)
 {
    FslIMX31State *s = FSL_IMX31(dev);
    uint16_t i;
-    Error *err = NULL;

    if (!qdev_realize(DEVICE(&s->cpu), NULL, errp)) {
        return;
@@ -188,30 +187,24 @@ static void fsl_imx31_realize(DeviceState *dev, Error **errp)
    sysbus_mmio_map(SYS_BUS_DEVICE(&s->wdt), 0, FSL_IMX31_WDT_ADDR);

    /* On a real system, the first 16k is a `secure boot rom' */
-    memory_region_init_rom(&s->secure_rom, OBJECT(dev), "imx31.secure_rom",
-                           FSL_IMX31_SECURE_ROM_SIZE, &err);
-    if (err) {
-        error_propagate(errp, err);
+    if (!memory_region_init_rom(&s->secure_rom, OBJECT(dev), "imx31.secure_rom",
+                                FSL_IMX31_SECURE_ROM_SIZE, errp)) {
        return;
    }
    memory_region_add_subregion(get_system_memory(), FSL_IMX31_SECURE_ROM_ADDR,
                                &s->secure_rom);

    /* There is also a 16k ROM */
-    memory_region_init_rom(&s->rom, OBJECT(dev), "imx31.rom",
-                           FSL_IMX31_ROM_SIZE, &err);
-    if (err) {
-        error_propagate(errp, err);
+    if (!memory_region_init_rom(&s->rom, OBJECT(dev), "imx31.rom",
+                                FSL_IMX31_ROM_SIZE, errp)) {
        return;
    }
    memory_region_add_subregion(get_system_memory(), FSL_IMX31_ROM_ADDR,
                                &s->rom);

    /* initialize internal RAM (16 KB) */
-    memory_region_init_ram(&s->iram, NULL, "imx31.iram", FSL_IMX31_IRAM_SIZE,
-                           &err);
-    if (err) {
-        error_propagate(errp, err);
+    if (!memory_region_init_ram(&s->iram, NULL, "imx31.iram",
+                                FSL_IMX31_IRAM_SIZE, errp)) {
        return;
    }
    memory_region_add_subregion(get_system_memory(), FSL_IMX31_IRAM_ADDR,
--- a/hw/arm/fsl-imx6.c
+++ b/hw/arm/fsl-imx6.c
@@ -109,7 +109,6 @@ static void fsl_imx6_realize(DeviceState *dev, Error **errp)
    MachineState *ms = MACHINE(qdev_get_machine());
    FslIMX6State *s = FSL_IMX6(dev);
    uint16_t i;
-    Error *err = NULL;
    unsigned int smp_cpus = ms->smp.cpus;

    if (smp_cpus > FSL_IMX6_NUM_CPUS) {
@@ -423,30 +422,24 @@ static void fsl_imx6_realize(DeviceState *dev, Error **errp)
    }

    /* ROM memory */
-    memory_region_init_rom(&s->rom, OBJECT(dev), "imx6.rom",
-                           FSL_IMX6_ROM_SIZE, &err);
-    if (err) {
-        error_propagate(errp, err);
+    if (!memory_region_init_rom(&s->rom, OBJECT(dev), "imx6.rom",
+                                FSL_IMX6_ROM_SIZE, errp)) {
        return;
    }
    memory_region_add_subregion(get_system_memory(), FSL_IMX6_ROM_ADDR,
                                &s->rom);

    /* CAAM memory */
-    memory_region_init_rom(&s->caam, OBJECT(dev), "imx6.caam",
-                           FSL_IMX6_CAAM_MEM_SIZE, &err);
-    if (err) {
-        error_propagate(errp, err);
+    if (!memory_region_init_rom(&s->caam, OBJECT(dev), "imx6.caam",
+                                FSL_IMX6_CAAM_MEM_SIZE, errp)) {
        return;
    }
    memory_region_add_subregion(get_system_memory(), FSL_IMX6_CAAM_MEM_ADDR,
                                &s->caam);

    /* OCRAM memory */
-    memory_region_init_ram(&s->ocram, NULL, "imx6.ocram", FSL_IMX6_OCRAM_SIZE,
-                           &err);
-    if (err) {
-        error_propagate(errp, err);
+    if (!memory_region_init_ram(&s->ocram, NULL, "imx6.ocram",
+                                FSL_IMX6_OCRAM_SIZE, errp)) {
        return;
    }
    memory_region_add_subregion(get_system_memory(), FSL_IMX6_OCRAM_ADDR,
--- a/hw/arm/integratorcp.c
+++ b/hw/arm/integratorcp.c
@@ -291,12 +291,9 @@ static void integratorcm_realize(DeviceState *d, Error **errp)
 {
    IntegratorCMState *s = INTEGRATOR_CM(d);
    SysBusDevice *dev = SYS_BUS_DEVICE(d);
-    Error *local_err = NULL;

-    memory_region_init_ram(&s->flash, OBJECT(d), "integrator.flash", 0x100000,
-                           &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    if (!memory_region_init_ram(&s->flash, OBJECT(d), "integrator.flash",
+                                0x100000, errp)) {
        return;
    }

--- a/hw/arm/nrf51_soc.c
+++ b/hw/arm/nrf51_soc.c
@@ -58,7 +58,6 @@ static void nrf51_soc_realize(DeviceState *dev_soc, Error **errp)
 {
    NRF51State *s = NRF51_SOC(dev_soc);
    MemoryRegion *mr;
-    Error *err = NULL;
    uint8_t i = 0;
    hwaddr base_addr = 0;

@@ -92,10 +91,8 @@ static void nrf51_soc_realize(DeviceState *dev_soc, Error **errp)

    memory_region_add_subregion_overlap(&s->container, 0, s->board_memory, -1);

-    memory_region_init_ram(&s->sram, OBJECT(s), "nrf51.sram", s->sram_size,
-                           &err);
-    if (err) {
-        error_propagate(errp, err);
+    if (!memory_region_init_ram(&s->sram, OBJECT(s), "nrf51.sram", s->sram_size,
+                                errp)) {
        return;
    }
    memory_region_add_subregion(&s->container, NRF51_SRAM_BASE, &s->sram);
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -675,6 +675,8 @@ static void smmu_base_reset_hold(Object *obj)
 {
    SMMUState *s = ARM_SMMU(obj);

+    memset(s->smmu_pcibus_by_bus_num, 0, sizeof(s->smmu_pcibus_by_bus_num));
+
    g_hash_table_remove_all(s->configs);
    g_hash_table_remove_all(s->iotlb);
 }
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -65,7 +65,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status)
    iov_discard_undo(&req->inhdr_undo);
    iov_discard_undo(&req->outhdr_undo);
    virtqueue_push(req->vq, &req->elem, req->in_len);
-    if (s->dataplane_started && !s->dataplane_disabled) {
+    if (qemu_in_iothread()) {
        virtio_blk_data_plane_notify(s->dataplane, req->vq);
    } else {
        virtio_notify(vdev, req->vq);
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -418,6 +418,9 @@ static void xen_block_realize(XenDevice *xendev, Error **errp)

    xen_block_set_size(blockdev);

+    if (!monitor_add_blk(conf->blk, blockdev->drive->id, errp)) {
+        return;
+    }
    blockdev->dataplane =
        xen_block_dataplane_create(xendev, blk, conf->logical_block_size,
                                   blockdev->props.iothread);
@@ -874,6 +877,8 @@ static XenBlockDrive *xen_block_drive_create(const char *id,
    const char *mode = qdict_get_try_str(opts, "mode");
    const char *direct_io_safe = qdict_get_try_str(opts, "direct-io-safe");
    const char *discard_enable = qdict_get_try_str(opts, "discard-enable");
+    const char *suse_diskcache_disable_flush = qdict_get_try_str(opts,
+                                               "suse-diskcache-disable-flush");
    char *driver = NULL;
    char *filename = NULL;
    XenBlockDrive *drive = NULL;
@@ -954,6 +959,16 @@ static XenBlockDrive *xen_block_drive_create(const char *id,
        }
    }

+    if (suse_diskcache_disable_flush) {
+        unsigned long value;
+        if (!qemu_strtoul(suse_diskcache_disable_flush, NULL, 2, &value) && !!value) {
+            QDict *cache_qdict = qdict_new();
+
+            qdict_put_bool(cache_qdict, "no-flush", true);
+            qdict_put_obj(file_layer, "cache", QOBJECT(cache_qdict));
+        }
+    }
+
    /*
     * It is necessary to turn file locking off as an emulated device
     * may have already opened the same image file.
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -1189,6 +1189,11 @@ bool machine_mem_merge(MachineState *machine)
    return machine->mem_merge;
 }

+bool machine_require_guest_memfd(MachineState *machine)
+{
+    return machine->require_guest_memfd;
+}
+
 static char *cpu_slot_to_string(const CPUArchId *cpu)
 {
    GString *s = g_string_new(NULL);
--- a/hw/cxl/cxl-cdat.c
+++ b/hw/cxl/cxl-cdat.c
@@ -49,6 +49,7 @@ static void ct3_build_cdat(CDATObject *cdat, Error **errp)
    g_autofree CDATTableHeader *cdat_header = NULL;
    g_autofree CDATEntry *cdat_st = NULL;
    uint8_t sum = 0;
+    uint8_t *hdr_buf;
    int ent, i;

    /* Use default table if fopen == NULL */
@@ -63,7 +64,7 @@ static void ct3_build_cdat(CDATObject *cdat, Error **errp)
    cdat->built_buf_len = cdat->build_cdat_table(&cdat->built_buf,
                                                 cdat->private);

-    if (!cdat->built_buf_len) {
+    if (cdat->built_buf_len <= 0) {
        /* Build later as not all data available yet */
        cdat->to_update = true;
        return;
@@ -95,8 +96,12 @@ static void ct3_build_cdat(CDATObject *cdat, Error **errp)
    /* For now, no runtime updates */
    cdat_header->sequence = 0;
    cdat_header->length += sizeof(CDATTableHeader);
-    sum += cdat_header->revision + cdat_header->sequence +
-        cdat_header->length;
+
+    hdr_buf = (uint8_t *)cdat_header;
+    for (i = 0; i < sizeof(*cdat_header); i++) {
+        sum += hdr_buf[i];
+    }
+
    /* Sum of all bytes including checksum must be 0 */
    cdat_header->checksum = ~sum + 1;

--- a/hw/cxl/cxl-component-utils.c
+++ b/hw/cxl/cxl-component-utils.c
@@ -199,7 +199,7 @@ void cxl_component_register_block_init(Object *obj,
    /* io registers controls link which we don't care about in QEMU */
    memory_region_init_io(&cregs->io, obj, NULL, cregs, ".io",
                          CXL2_COMPONENT_IO_REGION_SIZE);
-    memory_region_init_io(&cregs->cache_mem, obj, &cache_mem_ops, cregs,
+    memory_region_init_io(&cregs->cache_mem, obj, &cache_mem_ops, cxl_cstate,
                          ".cache_mem", CXL2_COMPONENT_CM_REGION_SIZE);

    memory_region_add_subregion(&cregs->component_registers, 0, &cregs->io);
--- a/hw/cxl/cxl-device-utils.c
+++ b/hw/cxl/cxl-device-utils.c
@@ -229,12 +229,9 @@ static void mailbox_reg_write(void *opaque, hwaddr offset, uint64_t value,

 static uint64_t mdev_reg_read(void *opaque, hwaddr offset, unsigned size)
 {
-    uint64_t retval = 0;
+    CXLDeviceState *cxl_dstate = opaque;

-    retval = FIELD_DP64(retval, CXL_MEM_DEV_STS, MEDIA_STATUS, 1);
-    retval = FIELD_DP64(retval, CXL_MEM_DEV_STS, MBOX_READY, 1);
-
-    return retval;
+    return cxl_dstate->memdev_status;
 }

 static void ro_reg_write(void *opaque, hwaddr offset, uint64_t value,
@@ -371,7 +368,15 @@ static void mailbox_reg_init_common(CXLDeviceState *cxl_dstate)
    cxl_dstate->mbox_msi_n = msi_n;
 }

-static void memdev_reg_init_common(CXLDeviceState *cxl_dstate) { }
+static void memdev_reg_init_common(CXLDeviceState *cxl_dstate)
+{
+    uint64_t memdev_status_reg;
+
+    memdev_status_reg = FIELD_DP64(0, CXL_MEM_DEV_STS, MEDIA_STATUS, 1);
+    memdev_status_reg = FIELD_DP64(memdev_status_reg, CXL_MEM_DEV_STS,
+                                   MBOX_READY, 1);
+    cxl_dstate->memdev_status = memdev_status_reg;
+}

 void cxl_device_register_init_t3(CXLType3Dev *ct3d)
 {
--- a/hw/display/virtio-gpu-virgl.c
+++ b/hw/display/virtio-gpu-virgl.c
@@ -181,7 +181,7 @@ static void virgl_cmd_set_scanout(VirtIOGPU *g,
        memset(&info, 0, sizeof(info));
        ret = virgl_renderer_resource_get_info(ss.resource_id, &info);
 #endif
-        if (ret == -1) {
+        if (ret) {
            qemu_log_mask(LOG_GUEST_ERROR,
                          "%s: illegal resource specified %d\n",
                          __func__, ss.resource_id);
--- a/hw/hppa/Kconfig
+++ b/hw/hppa/Kconfig
@@ -7,6 +7,7 @@ config HPPA_B160L
    select DINO
    select LASI
    select SERIAL
+    select SERIAL_PCI
    select ISA_BUS
    select I8259
    select IDE_CMD646
@@ -16,3 +17,4 @@ config HPPA_B160L
    select LASIPS2
    select PARALLEL
    select ARTIST
+    select USB_OHCI_PCI
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -10,6 +10,11 @@ config SGX
    bool
    depends on KVM

+config TDX
+    bool
+    select X86_FW_OVMF
+    depends on KVM
+
 config PC
    bool
    imply APPLESMC
@@ -26,6 +31,7 @@ config PC
    imply QXL
    imply SEV
    imply SGX
+    imply TDX
    imply TEST_DEVICES
    imply TPM_CRB
    imply TPM_TIS_ISA
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -975,7 +975,8 @@ static void build_dbg_aml(Aml *table)
    aml_append(table, scope);
 }

-static Aml *build_link_dev(const char *name, uint8_t uid, Aml *reg)
+static Aml *build_link_dev(const char *name, uint8_t uid, Aml *reg,
+                           bool level_trigger_unsupported)
 {
    Aml *dev;
    Aml *crs;
@@ -987,7 +988,10 @@ static Aml *build_link_dev(const char *name, uint8_t uid, Aml *reg)
    aml_append(dev, aml_name_decl("_UID", aml_int(uid)));

    crs = aml_resource_template();
-    aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL, AML_ACTIVE_HIGH,
+    aml_append(crs, aml_interrupt(AML_CONSUMER,
+                                  level_trigger_unsupported ?
+                                  AML_EDGE : AML_LEVEL,
+                                  AML_ACTIVE_HIGH,
                                  AML_SHARED, irqs, ARRAY_SIZE(irqs)));
    aml_append(dev, aml_name_decl("_PRS", crs));

@@ -1011,7 +1015,8 @@ static Aml *build_link_dev(const char *name, uint8_t uid, Aml *reg)
    return dev;
 }

-static Aml *build_gsi_link_dev(const char *name, uint8_t uid, uint8_t gsi)
+static Aml *build_gsi_link_dev(const char *name, uint8_t uid,
+                               uint8_t gsi, bool level_trigger_unsupported)
 {
    Aml *dev;
    Aml *crs;
@@ -1024,7 +1029,10 @@ static Aml *build_gsi_link_dev(const char *name, uint8_t uid, uint8_t gsi)

    crs = aml_resource_template();
    irqs = gsi;
-    aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL, AML_ACTIVE_HIGH,
+    aml_append(crs, aml_interrupt(AML_CONSUMER,
+                                  level_trigger_unsupported ?
+                                  AML_EDGE : AML_LEVEL,
+                                  AML_ACTIVE_HIGH,
                                  AML_SHARED, &irqs, 1));
    aml_append(dev, aml_name_decl("_PRS", crs));

@@ -1043,7 +1051,7 @@ static Aml *build_gsi_link_dev(const char *name, uint8_t uid, uint8_t gsi)
 }

 /* _CRS method - get current settings */
-static Aml *build_iqcr_method(bool is_piix4)
+static Aml *build_iqcr_method(bool is_piix4, bool level_trigger_unsupported)
 {
    Aml *if_ctx;
    uint32_t irqs;
@@ -1051,7 +1059,9 @@ static Aml *build_iqcr_method(bool is_piix4)
    Aml *crs = aml_resource_template();

    irqs = 0;
-    aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL,
+    aml_append(crs, aml_interrupt(AML_CONSUMER,
+                                  level_trigger_unsupported ?
+                                  AML_EDGE : AML_LEVEL,
                                  AML_ACTIVE_HIGH, AML_SHARED, &irqs, 1));
    aml_append(method, aml_name_decl("PRR0", crs));

@@ -1085,7 +1095,7 @@ static Aml *build_irq_status_method(void)
    return method;
 }

-static void build_piix4_pci0_int(Aml *table)
+static void build_piix4_pci0_int(Aml *table, bool level_trigger_unsupported)
 {
    Aml *dev;
    Aml *crs;
@@ -1098,12 +1108,16 @@ static void build_piix4_pci0_int(Aml *table)
    aml_append(sb_scope, pci0_scope);

    aml_append(sb_scope, build_irq_status_method());
-    aml_append(sb_scope, build_iqcr_method(true));
+    aml_append(sb_scope, build_iqcr_method(true, level_trigger_unsupported));

-    aml_append(sb_scope, build_link_dev("LNKA", 0, aml_name("PRQ0")));
-    aml_append(sb_scope, build_link_dev("LNKB", 1, aml_name("PRQ1")));
-    aml_append(sb_scope, build_link_dev("LNKC", 2, aml_name("PRQ2")));
-    aml_append(sb_scope, build_link_dev("LNKD", 3, aml_name("PRQ3")));
+    aml_append(sb_scope, build_link_dev("LNKA", 0, aml_name("PRQ0"),
+                                        level_trigger_unsupported));
+    aml_append(sb_scope, build_link_dev("LNKB", 1, aml_name("PRQ1"),
+                                        level_trigger_unsupported));
+    aml_append(sb_scope, build_link_dev("LNKC", 2, aml_name("PRQ2"),
+                                        level_trigger_unsupported));
+    aml_append(sb_scope, build_link_dev("LNKD", 3, aml_name("PRQ3"),
+                                        level_trigger_unsupported));

    dev = aml_device("LNKS");
    {
@@ -1112,7 +1126,9 @@ static void build_piix4_pci0_int(Aml *table)

        crs = aml_resource_template();
        irqs = 9;
-        aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL,
+        aml_append(crs, aml_interrupt(AML_CONSUMER,
+                                      level_trigger_unsupported ?
+                                      AML_EDGE : AML_LEVEL,
                                      AML_ACTIVE_HIGH, AML_SHARED,
                                      &irqs, 1));
        aml_append(dev, aml_name_decl("_PRS", crs));
@@ -1198,7 +1214,7 @@ static Aml *build_q35_routing_table(const char *str)
    return pkg;
 }

-static void build_q35_pci0_int(Aml *table)
+static void build_q35_pci0_int(Aml *table, bool level_trigger_unsupported)
 {
    Aml *method;
    Aml *sb_scope = aml_scope("_SB");
@@ -1237,25 +1253,41 @@ static void build_q35_pci0_int(Aml *table)
    aml_append(sb_scope, pci0_scope);

    aml_append(sb_scope, build_irq_status_method());
-    aml_append(sb_scope, build_iqcr_method(false));
+    aml_append(sb_scope, build_iqcr_method(false, level_trigger_unsupported));

-    aml_append(sb_scope, build_link_dev("LNKA", 0, aml_name("PRQA")));
-    aml_append(sb_scope, build_link_dev("LNKB", 1, aml_name("PRQB")));
-    aml_append(sb_scope, build_link_dev("LNKC", 2, aml_name("PRQC")));
-    aml_append(sb_scope, build_link_dev("LNKD", 3, aml_name("PRQD")));
-    aml_append(sb_scope, build_link_dev("LNKE", 4, aml_name("PRQE")));
-    aml_append(sb_scope, build_link_dev("LNKF", 5, aml_name("PRQF")));
-    aml_append(sb_scope, build_link_dev("LNKG", 6, aml_name("PRQG")));
-    aml_append(sb_scope, build_link_dev("LNKH", 7, aml_name("PRQH")));
+    aml_append(sb_scope, build_link_dev("LNKA", 0, aml_name("PRQA"),
+                                        level_trigger_unsupported));
+    aml_append(sb_scope, build_link_dev("LNKB", 1, aml_name("PRQB"),
+                                        level_trigger_unsupported));
+    aml_append(sb_scope, build_link_dev("LNKC", 2, aml_name("PRQC"),
+                                        level_trigger_unsupported));
+    aml_append(sb_scope, build_link_dev("LNKD", 3, aml_name("PRQD"),
+                                        level_trigger_unsupported));
+    aml_append(sb_scope, build_link_dev("LNKE", 4, aml_name("PRQE"),
+                                        level_trigger_unsupported));
+    aml_append(sb_scope, build_link_dev("LNKF", 5, aml_name("PRQF"),
+                                        level_trigger_unsupported));
+    aml_append(sb_scope, build_link_dev("LNKG", 6, aml_name("PRQG"),
+                                        level_trigger_unsupported));
+    aml_append(sb_scope, build_link_dev("LNKH", 7, aml_name("PRQH"),
+                                        level_trigger_unsupported));

-    aml_append(sb_scope, build_gsi_link_dev("GSIA", 0x10, 0x10));
-    aml_append(sb_scope, build_gsi_link_dev("GSIB", 0x11, 0x11));
-    aml_append(sb_scope, build_gsi_link_dev("GSIC", 0x12, 0x12));
-    aml_append(sb_scope, build_gsi_link_dev("GSID", 0x13, 0x13));
-    aml_append(sb_scope, build_gsi_link_dev("GSIE", 0x14, 0x14));
-    aml_append(sb_scope, build_gsi_link_dev("GSIF", 0x15, 0x15));
-    aml_append(sb_scope, build_gsi_link_dev("GSIG", 0x16, 0x16));
-    aml_append(sb_scope, build_gsi_link_dev("GSIH", 0x17, 0x17));
+    aml_append(sb_scope, build_gsi_link_dev("GSIA", 0x10, 0x10,
+                                            level_trigger_unsupported));
+    aml_append(sb_scope, build_gsi_link_dev("GSIB", 0x11, 0x11,
+                                            level_trigger_unsupported));
+    aml_append(sb_scope, build_gsi_link_dev("GSIC", 0x12, 0x12,
+                                            level_trigger_unsupported));
+    aml_append(sb_scope, build_gsi_link_dev("GSID", 0x13, 0x13,
+                                            level_trigger_unsupported));
+    aml_append(sb_scope, build_gsi_link_dev("GSIE", 0x14, 0x14,
+                                            level_trigger_unsupported));
+    aml_append(sb_scope, build_gsi_link_dev("GSIF", 0x15, 0x15,
+                                            level_trigger_unsupported));
+    aml_append(sb_scope, build_gsi_link_dev("GSIG", 0x16, 0x16,
+                                            level_trigger_unsupported));
+    aml_append(sb_scope, build_gsi_link_dev("GSIH", 0x17, 0x17,
+                                            level_trigger_unsupported));

    aml_append(table, sb_scope);
 }
@@ -1415,7 +1447,7 @@ static void build_acpi0017(Aml *table)
    aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0017")));

    method = aml_method("_STA", 0, AML_NOTSERIALIZED);
-    aml_append(method, aml_return(aml_int(0x01)));
+    aml_append(method, aml_return(aml_int(0x0B)));
    aml_append(dev, method);
    build_cxl_dsm_method(dev);

@@ -1436,6 +1468,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
    PCMachineState *pcms = PC_MACHINE(machine);
    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);
    X86MachineState *x86ms = X86_MACHINE(machine);
+    bool level_trigger_unsupported = x86ms->eoi_intercept_unsupported;
    AcpiMcfgInfo mcfg;
    bool mcfg_valid = !!acpi_get_mcfg(&mcfg);
    uint32_t nr_mem = machine->ram_slots;
@@ -1468,7 +1501,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
        if (pm->pcihp_bridge_en || pm->pcihp_root_en) {
            build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base);
        }
-        build_piix4_pci0_int(dsdt);
+        build_piix4_pci0_int(dsdt, level_trigger_unsupported);
    } else if (q35) {
        sb_scope = aml_scope("_SB");
        dev = aml_device("PCI0");
@@ -1512,7 +1545,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
        if (pm->pcihp_bridge_en) {
            build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base);
        }
-        build_q35_pci0_int(dsdt);
+        build_q35_pci0_int(dsdt, level_trigger_unsupported);
    }

    if (misc->has_hpet) {
--- a/hw/i386/acpi-common.c
+++ b/hw/i386/acpi-common.c
@@ -100,9 +100,11 @@ void acpi_build_madt(GArray *table_data, BIOSLinker *linker,
    int i;
    bool x2apic_mode = false;
    MachineClass *mc = MACHINE_GET_CLASS(x86ms);
+    X86MachineClass *x86mc = X86_MACHINE_GET_CLASS(x86ms);
    const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(MACHINE(x86ms));
    AcpiTable table = { .sig = "APIC", .rev = 3, .oem_id = oem_id,
                        .oem_table_id = oem_table_id };
+    bool level_trigger_unsupported = x86ms->eoi_intercept_unsupported;

    acpi_table_begin(&table, table_data);
    /* Local APIC Address */
@@ -122,18 +124,42 @@ void acpi_build_madt(GArray *table_data, BIOSLinker *linker,
                     IO_APIC_SECONDARY_ADDRESS, IO_APIC_SECONDARY_IRQBASE);
    }

-    if (x86ms->apic_xrupt_override) {
-        build_xrupt_override(table_data, 0, 2,
-            0 /* Flags: Conforms to the specifications of the bus */);
-    }
-
-    for (i = 1; i < 16; i++) {
-        if (!(x86ms->pci_irq_mask & (1 << i))) {
-            /* No need for a INT source override structure. */
-            continue;
+    if (level_trigger_unsupported) {
+        /* Force edge trigger */
+        if (x86mc->apic_xrupt_override) {
+            build_xrupt_override(table_data, 0, 2,
+                                 /* Flags: active high, edge triggered */
+                                 1 | (1 << 2));
+        }
+
+        for (i = x86mc->apic_xrupt_override ? 1 : 0; i < 16; i++) {
+            build_xrupt_override(table_data, i, i,
+                                 /* Flags: active high, edge triggered */
+                                 1 | (1 << 2));
+        }
+
+        if (x86ms->ioapic2) {
+            for (i = 0; i < 16; i++) {
+                build_xrupt_override(table_data, IO_APIC_SECONDARY_IRQBASE + i,
+                                     IO_APIC_SECONDARY_IRQBASE + i,
+                                     /* Flags: active high, edge triggered */
+                                     1 | (1 << 2));
+            }
+        }
+    } else {
+        if (x86mc->apic_xrupt_override) {
+            build_xrupt_override(table_data, 0, 2,
+                    0 /* Flags: Conforms to the specifications of the bus */);
+        }
+
+        for (i = 1; i < 16; i++) {
+            if (!(x86ms->pci_irq_mask & (1 << i))) {
+                /* No need for a INT source override structure. */
+                continue;
+            }
+            build_xrupt_override(table_data, i, i,
+                0xd /* Flags: Active high, Level Triggered */);
        }
-        build_xrupt_override(table_data, i, i,
-            0xd /* Flags: Active high, Level Triggered */);
    }

    if (x2apic_mode) {
--- a/hw/i386/fw_cfg.c
+++ b/hw/i386/fw_cfg.c
@@ -48,15 +48,25 @@ const char *fw_cfg_arch_key_name(uint16_t key)
    return NULL;
 }

-void fw_cfg_build_smbios(MachineState *ms, FWCfgState *fw_cfg)
+void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg)
 {
 #ifdef CONFIG_SMBIOS
    uint8_t *smbios_tables, *smbios_anchor;
    size_t smbios_tables_len, smbios_anchor_len;
    struct smbios_phys_mem_area *mem_array;
    unsigned i, array_count;
+    MachineState *ms = MACHINE(pcms);
+    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
+    MachineClass *mc = MACHINE_GET_CLASS(pcms);
    X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu);

+    if (pcmc->smbios_defaults) {
+        /* These values are guest ABI, do not change */
+        smbios_set_defaults("QEMU", mc->desc, mc->name,
+                            pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded,
+                            pcms->smbios_entry_point_type);
+    }
+
    /* tell smbios about cpuid version and features */
    smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]);

--- a/hw/i386/fw_cfg.h
+++ b/hw/i386/fw_cfg.h
@@ -10,6 +10,7 @@
 #define HW_I386_FW_CFG_H

 #include "hw/boards.h"
+#include "hw/i386/pc.h"
 #include "hw/nvram/fw_cfg.h"

 #define FW_CFG_IO_BASE     0x510
@@ -22,7 +23,7 @@
 FWCfgState *fw_cfg_arch_create(MachineState *ms,
                               uint16_t boot_cpus,
                               uint16_t apic_id_limit);
-void fw_cfg_build_smbios(MachineState *ms, FWCfgState *fw_cfg);
+void fw_cfg_build_smbios(PCMachineState *ms, FWCfgState *fw_cfg);
 void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg);
 void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg);

--- a/hw/i386/meson.build
+++ b/hw/i386/meson.build
@@ -27,6 +27,7 @@ i386_ss.add(when: 'CONFIG_PC', if_true: files(
  'port92.c'))
 i386_ss.add(when: 'CONFIG_X86_FW_OVMF', if_true: files('pc_sysfw_ovmf.c'),
                                        if_false: files('pc_sysfw_ovmf-stubs.c'))
+i386_ss.add(when: 'CONFIG_TDX', if_true: files('tdvf.c', 'tdvf-hob.c'))

 subdir('kvm')
 subdir('xen')
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -175,7 +175,7 @@ static void microvm_devices_init(MicrovmMachineState *mms)
                          &error_abort);
    isa_bus_register_input_irqs(isa_bus, x86ms->gsi);

-    ioapic_init_gsi(gsi_state, "machine");
+    ioapic_init_gsi(gsi_state, OBJECT(mms));
    if (ioapics > 1) {
        x86ms->ioapic2 = ioapic_init_secondary(gsi_state);
    }
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -43,6 +43,7 @@
 #include "sysemu/xen.h"
 #include "sysemu/reset.h"
 #include "kvm/kvm_i386.h"
+#include "kvm/tdx.h"
 #include "hw/xen/xen.h"
 #include "qapi/qmp/qlist.h"
 #include "qemu/error-report.h"
@@ -692,22 +693,13 @@ void pc_machine_done(Notifier *notifier, void *data)

    acpi_setup();
    if (x86ms->fw_cfg) {
-        fw_cfg_build_smbios(MACHINE(pcms), x86ms->fw_cfg);
+        fw_cfg_build_smbios(pcms, x86ms->fw_cfg);
        fw_cfg_build_feature_control(MACHINE(pcms), x86ms->fw_cfg);
        /* update FW_CFG_NB_CPUS to account for -device added CPUs */
        fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus);
    }
 }

-void pc_guest_info_init(PCMachineState *pcms)
-{
-    X86MachineState *x86ms = X86_MACHINE(pcms);
-
-    x86ms->apic_xrupt_override = true;
-    pcms->machine_done.notify = pc_machine_done;
-    qemu_add_machine_init_done_notifier(&pcms->machine_done);
-}
-
 /* setup pci memory address space mapping into system address space */
 void pc_pci_as_mapping_init(MemoryRegion *system_memory,
                            MemoryRegion *pci_address_space)
@@ -1036,16 +1028,18 @@ void pc_memory_init(PCMachineState *pcms,
    /* Initialize PC system firmware */
    pc_system_firmware_init(pcms, rom_memory);

-    option_rom_mr = g_malloc(sizeof(*option_rom_mr));
-    memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
-                           &error_fatal);
-    if (pcmc->pci_enabled) {
-        memory_region_set_readonly(option_rom_mr, true);
+    if (!is_tdx_vm()) {
+        option_rom_mr = g_malloc(sizeof(*option_rom_mr));
+        memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
+                            &error_fatal);
+        if (pcmc->pci_enabled) {
+            memory_region_set_readonly(option_rom_mr, true);
+        }
+        memory_region_add_subregion_overlap(rom_memory,
+                                            PC_ROM_MIN_VGA,
+                                            option_rom_mr,
+                                            1);
    }
-    memory_region_add_subregion_overlap(rom_memory,
-                                        PC_ROM_MIN_VGA,
-                                        option_rom_mr,
-                                        1);

    fw_cfg = fw_cfg_arch_create(machine,
                                x86ms->boot_cpus, x86ms->apic_id_limit);
@@ -1753,6 +1747,9 @@ static void pc_machine_initfn(Object *obj)
    object_property_add_alias(OBJECT(pcms), "pcspk-audiodev",
                              OBJECT(pcms->pcspk), "audiodev");
    cxl_machine_init(obj, &pcms->cxl_devices_state);
+
+    pcms->machine_done.notify = pc_machine_done;
+    qemu_add_machine_init_done_notifier(&pcms->machine_done);
 }

 int pc_machine_kvm_type(MachineState *machine, const char *kvm_type)
@@ -1806,6 +1803,7 @@ static bool pc_hotplug_allowed(MachineState *ms, DeviceState *dev, Error **errp)
 static void pc_machine_class_init(ObjectClass *oc, void *data)
 {
    MachineClass *mc = MACHINE_CLASS(oc);
+    X86MachineClass *x86mc = X86_MACHINE_CLASS(oc);
    PCMachineClass *pcmc = PC_MACHINE_CLASS(oc);
    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);

@@ -1825,6 +1823,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
    pcmc->pvh_enabled = true;
    pcmc->kvmclock_create_always = true;
    pcmc->resizable_acpi_blob = true;
+    x86mc->apic_xrupt_override = true;
    assert(!mc->get_hotplug_handler);
    mc->get_hotplug_handler = pc_get_hotplug_handler;
    mc->hotplug_allowed = pc_hotplug_allowed;
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -36,7 +36,6 @@
 #include "hw/rtc/mc146818rtc.h"
 #include "hw/southbridge/piix.h"
 #include "hw/display/ramfb.h"
-#include "hw/firmware/smbios.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/pci_ids.h"
 #include "hw/usb.h"
@@ -112,6 +111,7 @@ static void pc_init1(MachineState *machine,
    X86MachineState *x86ms = X86_MACHINE(machine);
    MemoryRegion *system_memory = get_system_memory();
    MemoryRegion *system_io = get_system_io();
+    Object *phb = NULL;
    PCIBus *pci_bus = NULL;
    ISABus *isa_bus;
    Object *piix4_pm = NULL;
@@ -195,8 +195,6 @@ static void pc_init1(MachineState *machine,
    }

    if (pcmc->pci_enabled) {
-        Object *phb;
-
        pci_memory = g_new(MemoryRegion, 1);
        memory_region_init(pci_memory, NULL, "pci", UINT64_MAX);
        rom_memory = pci_memory;
@@ -230,17 +228,6 @@ static void pc_init1(MachineState *machine,
                                               &error_abort);
    }

-    pc_guest_info_init(pcms);
-
-    if (pcmc->smbios_defaults) {
-        MachineClass *mc = MACHINE_GET_CLASS(machine);
-        /* These values are guest ABI, do not change */
-        smbios_set_defaults("QEMU", mc->desc,
-                            mc->name, pcmc->smbios_legacy_mode,
-                            pcmc->smbios_uuid_encoded,
-                            pcms->smbios_entry_point_type);
-    }
-
    /* allocate ram and load rom/bios */
    if (!xen_enabled()) {
        pc_memory_init(pcms, system_memory, rom_memory, hole64_size);
@@ -323,8 +310,8 @@ static void pc_init1(MachineState *machine,
        pc_i8259_create(isa_bus, gsi_state->i8259_irq);
    }

-    if (pcmc->pci_enabled) {
-        ioapic_init_gsi(gsi_state, "i440fx");
+    if (phb) {
+        ioapic_init_gsi(gsi_state, phb);
    }

    if (tcg_enabled()) {
@@ -344,11 +331,8 @@ static void pc_init1(MachineState *machine,

    pc_nic_init(pcmc, isa_bus, pci_bus, pcms->xenbus);

-    if (pcmc->pci_enabled) {
-        pc_cmos_init(pcms, idebus[0], idebus[1], rtc_state);
-    }
 #ifdef CONFIG_IDE_ISA
-    else {
+    if (!pcmc->pci_enabled) {
        DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
        int i;

@@ -366,10 +350,11 @@ static void pc_init1(MachineState *machine,
            busname[4] = '0' + i;
            idebus[i] = qdev_get_child_bus(DEVICE(dev), busname);
        }
-        pc_cmos_init(pcms, idebus[0], idebus[1], rtc_state);
    }
 #endif

+    pc_cmos_init(pcms, idebus[0], idebus[1], rtc_state);
+
    if (piix4_pm) {
        smi_irq = qemu_allocate_irq(pc_acpi_smi_interrupt, first_cpu, 0);

--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -45,7 +45,6 @@
 #include "hw/i386/amd_iommu.h"
 #include "hw/i386/intel_iommu.h"
 #include "hw/display/ramfb.h"
-#include "hw/firmware/smbios.h"
 #include "hw/ide/pci.h"
 #include "hw/ide/ahci.h"
 #include "hw/intc/ioapic.h"
@@ -130,8 +129,7 @@ static void pc_q35_init(MachineState *machine)
    ISADevice *rtc_state;
    MemoryRegion *system_memory = get_system_memory();
    MemoryRegion *system_io = get_system_io();
-    MemoryRegion *pci_memory;
-    MemoryRegion *rom_memory;
+    MemoryRegion *pci_memory = g_new(MemoryRegion, 1);
    GSIState *gsi_state;
    ISABus *isa_bus;
    int i;
@@ -143,6 +141,8 @@ static void pc_q35_init(MachineState *machine)
    bool keep_pci_slot_hpc;
    uint64_t pci_hole64_size = 0;

+    assert(pcmc->pci_enabled);
+
    /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
     * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping
     * also known as MMCFG).
@@ -189,37 +189,16 @@ static void pc_q35_init(MachineState *machine)
        kvmclock_create(pcmc->kvmclock_create_always);
    }

-    /* pci enabled */
-    if (pcmc->pci_enabled) {
-        pci_memory = g_new(MemoryRegion, 1);
-        memory_region_init(pci_memory, NULL, "pci", UINT64_MAX);
-        rom_memory = pci_memory;
-    } else {
-        pci_memory = NULL;
-        rom_memory = system_memory;
-    }
-
-    pc_guest_info_init(pcms);
-
-    if (pcmc->smbios_defaults) {
-        /* These values are guest ABI, do not change */
-        smbios_set_defaults("QEMU", mc->desc,
-                            mc->name, pcmc->smbios_legacy_mode,
-                            pcmc->smbios_uuid_encoded,
-                            pcms->smbios_entry_point_type);
-    }
-
    /* create pci host bus */
    phb = OBJECT(qdev_new(TYPE_Q35_HOST_DEVICE));

-    if (pcmc->pci_enabled) {
-        pci_hole64_size = object_property_get_uint(phb,
-                                                   PCI_HOST_PROP_PCI_HOLE64_SIZE,
-                                                   &error_abort);
-    }
+    pci_hole64_size = object_property_get_uint(phb,
+                                               PCI_HOST_PROP_PCI_HOLE64_SIZE,
+                                               &error_abort);

    /* allocate ram and load rom/bios */
-    pc_memory_init(pcms, system_memory, rom_memory, pci_hole64_size);
+    memory_region_init(pci_memory, NULL, "pci", UINT64_MAX);
+    pc_memory_init(pcms, system_memory, pci_memory, pci_hole64_size);

    object_property_add_child(OBJECT(machine), "q35", phb);
    object_property_set_link(phb, PCI_HOST_PROP_RAM_MEM,
@@ -236,6 +215,8 @@ static void pc_q35_init(MachineState *machine)
                            x86ms->above_4g_mem_size, NULL);
    object_property_set_bool(phb, PCI_HOST_BYPASS_IOMMU,
                             pcms->default_bus_bypass_iommu, NULL);
+    object_property_set_bool(phb, PCI_HOST_PROP_SMM_RANGES,
+                             x86_machine_is_smm_enabled(x86ms), NULL);
    sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal);

    /* pci */
@@ -243,14 +224,14 @@ static void pc_q35_init(MachineState *machine)
    pcms->bus = host_bus;

    /* irq lines */
-    gsi_state = pc_gsi_create(&x86ms->gsi, pcmc->pci_enabled);
+    gsi_state = pc_gsi_create(&x86ms->gsi, true);

    /* create ISA bus */
    lpc = pci_new_multifunction(PCI_DEVFN(ICH9_LPC_DEV, ICH9_LPC_FUNC),
                                TYPE_ICH9_LPC_DEVICE);
-    qdev_prop_set_bit(DEVICE(lpc), "smm-enabled",
-                      x86_machine_is_smm_enabled(x86ms));
    lpc_dev = DEVICE(lpc);
+    qdev_prop_set_bit(lpc_dev, "smm-enabled",
+                      x86_machine_is_smm_enabled(x86ms));
    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
        qdev_connect_gpio_out_named(lpc_dev, ICH9_GPIO_GSI, i, x86ms->gsi[i]);
    }
@@ -286,9 +267,7 @@ static void pc_q35_init(MachineState *machine)
        pc_i8259_create(isa_bus, gsi_state->i8259_irq);
    }

-    if (pcmc->pci_enabled) {
-        ioapic_init_gsi(gsi_state, "q35");
-    }
+    ioapic_init_gsi(gsi_state, OBJECT(phb));

    if (tcg_enabled()) {
        x86_register_ferr_irq(x86ms->gsi[13]);
@@ -412,10 +391,6 @@ static void pc_q35_8_0_machine_options(MachineClass *m)
    pc_q35_8_1_machine_options(m);
    compat_props_add(m->compat_props, hw_compat_8_0, hw_compat_8_0_len);
    compat_props_add(m->compat_props, pc_compat_8_0, pc_compat_8_0_len);
-
-    /* For pc-q35-8.0 and older, use SMBIOS 2.8 by default */
-    pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32;
-    m->max_cpus = 288;
 }

 DEFINE_Q35_MACHINE(v8_0, "pc-q35-8.0", NULL,
@@ -448,6 +423,10 @@ static void pc_q35_7_0_machine_options(MachineClass *m)
    pcmc->enforce_amd_1tb_hole = false;
    compat_props_add(m->compat_props, hw_compat_7_0, hw_compat_7_0_len);
    compat_props_add(m->compat_props, pc_compat_7_0, pc_compat_7_0_len);
+
+    /* For pc-q35-7.0 and older, use SMBIOS 2.8 by default */
+    pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32;
+    m->max_cpus = 288;
 }

 DEFINE_Q35_MACHINE(v7_0, "pc-q35-7.0", NULL,
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -37,6 +37,7 @@
 #include "hw/block/flash.h"
 #include "sysemu/kvm.h"
 #include "sev.h"
+#include "kvm/tdx.h"

 #define FLASH_SECTOR_SIZE 4096

@@ -107,17 +108,15 @@ void pc_system_flash_cleanup_unused(PCMachineState *pcms)
 {
    char *prop_name;
    int i;
-    Object *dev_obj;

    assert(PC_MACHINE_GET_CLASS(pcms)->pci_enabled);

    for (i = 0; i < ARRAY_SIZE(pcms->flash); i++) {
-        dev_obj = OBJECT(pcms->flash[i]);
-        if (!object_property_get_bool(dev_obj, "realized", &error_abort)) {
+        if (!qdev_is_realized(DEVICE(pcms->flash[i]))) {
            prop_name = g_strdup_printf("pflash%d", i);
            object_property_del(OBJECT(pcms), prop_name);
            g_free(prop_name);
-            object_unparent(dev_obj);
+            object_unparent(OBJECT(pcms->flash[i]));
            pcms->flash[i] = NULL;
        }
    }
@@ -265,5 +264,11 @@ void x86_firmware_configure(void *ptr, int size)
        }

        sev_encrypt_flash(ptr, size, &error_fatal);
+    } else if (is_tdx_vm()) {
+        ret = tdx_parse_tdvf(ptr, size);
+        if (ret) {
+            error_report("failed to parse TDVF for TDX VM");
+            exit(1);
+        }
    }
 }
--- a/hw/i386/tdvf-hob.c
+++ b/hw/i386/tdvf-hob.c
@@ -0,0 +1,147 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+
+ * Copyright (c) 2020 Intel Corporation
+ * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
+ *                        <isaku.yamahata at intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/error-report.h"
+#include "e820_memory_layout.h"
+#include "hw/i386/pc.h"
+#include "hw/i386/x86.h"
+#include "hw/pci/pcie_host.h"
+#include "sysemu/kvm.h"
+#include "standard-headers/uefi/uefi.h"
+#include "tdvf-hob.h"
+
+typedef struct TdvfHob {
+    hwaddr hob_addr;
+    void *ptr;
+    int size;
+
+    /* working area */
+    void *current;
+    void *end;
+} TdvfHob;
+
+static uint64_t tdvf_current_guest_addr(const TdvfHob *hob)
+{
+    return hob->hob_addr + (hob->current - hob->ptr);
+}
+
+static void tdvf_align(TdvfHob *hob, size_t align)
+{
+    hob->current = QEMU_ALIGN_PTR_UP(hob->current, align);
+}
+
+static void *tdvf_get_area(TdvfHob *hob, uint64_t size)
+{
+    void *ret;
+
+    if (hob->current + size > hob->end) {
+        error_report("TD_HOB overrun, size = 0x%" PRIx64, size);
+        exit(1);
+    }
+
+    ret = hob->current;
+    hob->current += size;
+    tdvf_align(hob, 8);
+    return ret;
+}
+
+static void tdvf_hob_add_memory_resources(TdxGuest *tdx, TdvfHob *hob)
+{
+    EFI_HOB_RESOURCE_DESCRIPTOR *region;
+    EFI_RESOURCE_ATTRIBUTE_TYPE attr;
+    EFI_RESOURCE_TYPE resource_type;
+
+    TdxRamEntry *e;
+    int i;
+
+    for (i = 0; i < tdx->nr_ram_entries; i++) {
+        e = &tdx->ram_entries[i];
+
+        if (e->type == TDX_RAM_UNACCEPTED) {
+            resource_type = EFI_RESOURCE_MEMORY_UNACCEPTED;
+            attr = EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED;
+        } else if (e->type == TDX_RAM_ADDED){
+            resource_type = EFI_RESOURCE_SYSTEM_MEMORY;
+            attr = EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE;
+        } else {
+            error_report("unknown TDX_RAM_ENTRY type %d", e->type);
+            exit(1);
+        }
+
+        region = tdvf_get_area(hob, sizeof(*region));
+        *region = (EFI_HOB_RESOURCE_DESCRIPTOR) {
+            .Header = {
+                .HobType = EFI_HOB_TYPE_RESOURCE_DESCRIPTOR,
+                .HobLength = cpu_to_le16(sizeof(*region)),
+                .Reserved = cpu_to_le32(0),
+            },
+            .Owner = EFI_HOB_OWNER_ZERO,
+            .ResourceType = cpu_to_le32(resource_type),
+            .ResourceAttribute = cpu_to_le32(attr),
+            .PhysicalStart = cpu_to_le64(e->address),
+            .ResourceLength = cpu_to_le64(e->length),
+        };
+    }
+}
+
+void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob)
+{
+    TdvfHob hob = {
+        .hob_addr = td_hob->address,
+        .size = td_hob->size,
+        .ptr = td_hob->mem_ptr,
+
+        .current = td_hob->mem_ptr,
+        .end = td_hob->mem_ptr + td_hob->size,
+    };
+
+    EFI_HOB_GENERIC_HEADER *last_hob;
+    EFI_HOB_HANDOFF_INFO_TABLE *hit;
+
+    /* Note, Efi{Free}Memory{Bottom,Top} are ignored, leave 'em zeroed. */
+    hit = tdvf_get_area(&hob, sizeof(*hit));
+    *hit = (EFI_HOB_HANDOFF_INFO_TABLE) {
+        .Header = {
+            .HobType = EFI_HOB_TYPE_HANDOFF,
+            .HobLength = cpu_to_le16(sizeof(*hit)),
+            .Reserved = cpu_to_le32(0),
+        },
+        .Version = cpu_to_le32(EFI_HOB_HANDOFF_TABLE_VERSION),
+        .BootMode = cpu_to_le32(0),
+        .EfiMemoryTop = cpu_to_le64(0),
+        .EfiMemoryBottom = cpu_to_le64(0),
+        .EfiFreeMemoryTop = cpu_to_le64(0),
+        .EfiFreeMemoryBottom = cpu_to_le64(0),
+        .EfiEndOfHobList = cpu_to_le64(0), /* initialized later */
+    };
+
+    tdvf_hob_add_memory_resources(tdx, &hob);
+
+    last_hob = tdvf_get_area(&hob, sizeof(*last_hob));
+    *last_hob =  (EFI_HOB_GENERIC_HEADER) {
+        .HobType = EFI_HOB_TYPE_END_OF_HOB_LIST,
+        .HobLength = cpu_to_le16(sizeof(*last_hob)),
+        .Reserved = cpu_to_le32(0),
+    };
+    hit->EfiEndOfHobList = tdvf_current_guest_addr(&hob);
+}
--- a/hw/i386/tdvf-hob.h
+++ b/hw/i386/tdvf-hob.h
@@ -0,0 +1,24 @@
+#ifndef HW_I386_TD_HOB_H
+#define HW_I386_TD_HOB_H
+
+#include "hw/i386/tdvf.h"
+#include "target/i386/kvm/tdx.h"
+
+void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob);
+
+#define EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE     \
+    (EFI_RESOURCE_ATTRIBUTE_PRESENT |           \
+     EFI_RESOURCE_ATTRIBUTE_INITIALIZED |       \
+     EFI_RESOURCE_ATTRIBUTE_TESTED)
+
+#define EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED  \
+    (EFI_RESOURCE_ATTRIBUTE_PRESENT |           \
+     EFI_RESOURCE_ATTRIBUTE_INITIALIZED |       \
+     EFI_RESOURCE_ATTRIBUTE_TESTED)
+
+#define EFI_RESOURCE_ATTRIBUTE_TDVF_MMIO        \
+    (EFI_RESOURCE_ATTRIBUTE_PRESENT     |       \
+     EFI_RESOURCE_ATTRIBUTE_INITIALIZED |       \
+     EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE)
+
+#endif
--- a/hw/i386/tdvf.c
+++ b/hw/i386/tdvf.c
@@ -0,0 +1,200 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+
+ * Copyright (c) 2020 Intel Corporation
+ * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
+ *                        <isaku.yamahata at intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+
+#include "hw/i386/pc.h"
+#include "hw/i386/tdvf.h"
+#include "sysemu/kvm.h"
+
+#define TDX_METADATA_OFFSET_GUID    "e47a6535-984a-4798-865e-4685a7bf8ec2"
+#define TDX_METADATA_VERSION        1
+#define TDVF_SIGNATURE              0x46564454 /* TDVF as little endian */
+
+typedef struct {
+    uint32_t DataOffset;
+    uint32_t RawDataSize;
+    uint64_t MemoryAddress;
+    uint64_t MemoryDataSize;
+    uint32_t Type;
+    uint32_t Attributes;
+} TdvfSectionEntry;
+
+typedef struct {
+    uint32_t Signature;
+    uint32_t Length;
+    uint32_t Version;
+    uint32_t NumberOfSectionEntries;
+    TdvfSectionEntry SectionEntries[];
+} TdvfMetadata;
+
+struct tdx_metadata_offset {
+    uint32_t offset;
+};
+
+static TdvfMetadata *tdvf_get_metadata(void *flash_ptr, int size)
+{
+    TdvfMetadata *metadata;
+    uint32_t offset = 0;
+    uint8_t *data;
+
+    if ((uint32_t) size != size) {
+        return NULL;
+    }
+
+    if (pc_system_ovmf_table_find(TDX_METADATA_OFFSET_GUID, &data, NULL)) {
+        offset = size - le32_to_cpu(((struct tdx_metadata_offset *)data)->offset);
+
+        if (offset + sizeof(*metadata) > size) {
+            return NULL;
+        }
+    } else {
+        error_report("Cannot find TDX_METADATA_OFFSET_GUID");
+        return NULL;
+    }
+
+    metadata = flash_ptr + offset;
+
+    /* Finally, verify the signature to determine if this is a TDVF image. */
+    metadata->Signature = le32_to_cpu(metadata->Signature);
+    if (metadata->Signature != TDVF_SIGNATURE) {
+        error_report("Invalid TDVF signature in metadata!");
+        return NULL;
+    }
+
+    /* Sanity check that the TDVF doesn't overlap its own metadata. */
+    metadata->Length = le32_to_cpu(metadata->Length);
+    if (offset + metadata->Length > size) {
+        return NULL;
+    }
+
+    /* Only version 1 is supported/defined. */
+    metadata->Version = le32_to_cpu(metadata->Version);
+    if (metadata->Version != TDX_METADATA_VERSION) {
+        return NULL;
+    }
+
+    return metadata;
+}
+
+static int tdvf_parse_and_check_section_entry(const TdvfSectionEntry *src,
+                                              TdxFirmwareEntry *entry)
+{
+    entry->data_offset = le32_to_cpu(src->DataOffset);
+    entry->data_len = le32_to_cpu(src->RawDataSize);
+    entry->address = le64_to_cpu(src->MemoryAddress);
+    entry->size = le64_to_cpu(src->MemoryDataSize);
+    entry->type = le32_to_cpu(src->Type);
+    entry->attributes = le32_to_cpu(src->Attributes);
+
+    /* sanity check */
+    if (entry->size < entry->data_len) {
+        error_report("Broken metadata RawDataSize 0x%x MemoryDataSize 0x%lx",
+                     entry->data_len, entry->size);
+        return -1;
+    }
+    if (!QEMU_IS_ALIGNED(entry->address, TARGET_PAGE_SIZE)) {
+        error_report("MemoryAddress 0x%lx not page aligned", entry->address);
+        return -1;
+    }
+    if (!QEMU_IS_ALIGNED(entry->size, TARGET_PAGE_SIZE)) {
+        error_report("MemoryDataSize 0x%lx not page aligned", entry->size);
+        return -1;
+    }
+
+    switch (entry->type) {
+    case TDVF_SECTION_TYPE_BFV:
+    case TDVF_SECTION_TYPE_CFV:
+        /* The sections that must be copied from firmware image to TD memory */
+        if (entry->data_len == 0) {
+            error_report("%d section with RawDataSize == 0", entry->type);
+            return -1;
+        }
+        break;
+    case TDVF_SECTION_TYPE_TD_HOB:
+    case TDVF_SECTION_TYPE_TEMP_MEM:
+        /* The sections that no need to be copied from firmware image */
+        if (entry->data_len != 0) {
+            error_report("%d section with RawDataSize 0x%x != 0",
+                         entry->type, entry->data_len);
+            return -1;
+        }
+        break;
+    default:
+        error_report("TDVF contains unsupported section type %d", entry->type);
+        return -1;
+    }
+
+    return 0;
+}
+
+int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size)
+{
+    TdvfSectionEntry *sections;
+    TdvfMetadata *metadata;
+    ssize_t entries_size;
+    uint32_t len, i;
+
+    metadata = tdvf_get_metadata(flash_ptr, size);
+    if (!metadata) {
+        return -EINVAL;
+    }
+
+    //load and parse metadata entries
+    fw->nr_entries = le32_to_cpu(metadata->NumberOfSectionEntries);
+    if (fw->nr_entries < 2) {
+        error_report("Invalid number of fw entries (%u) in TDVF", fw->nr_entries);
+        return -EINVAL;
+    }
+
+    len = le32_to_cpu(metadata->Length);
+    entries_size = fw->nr_entries * sizeof(TdvfSectionEntry);
+    if (len != sizeof(*metadata) + entries_size) {
+        error_report("TDVF metadata len (0x%x) mismatch, expected (0x%x)",
+                     len, (uint32_t)(sizeof(*metadata) + entries_size));
+        return -EINVAL;
+    }
+
+    fw->entries = g_new(TdxFirmwareEntry, fw->nr_entries);
+    sections = g_new(TdvfSectionEntry, fw->nr_entries);
+
+    if (!memcpy(sections, (void *)metadata + sizeof(*metadata), entries_size))  {
+        error_report("Failed to read TDVF section entries");
+        goto err;
+    }
+
+    for (i = 0; i < fw->nr_entries; i++) {
+        if (tdvf_parse_and_check_section_entry(&sections[i], &fw->entries[i])) {
+            goto err;
+        }
+    }
+    g_free(sections);
+
+    fw->mem_ptr = flash_ptr;
+    return 0;
+
+err:
+    g_free(sections);
+    fw->entries = 0;
+    g_free(fw->entries);
+    return -EINVAL;
+}
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -47,6 +47,7 @@
 #include "hw/intc/i8259.h"
 #include "hw/rtc/mc146818rtc.h"
 #include "target/i386/sev.h"
+#include "kvm/tdx.h"

 #include "hw/acpi/cpu_hotplug.h"
 #include "hw/irq.h"
@@ -636,20 +637,19 @@ void gsi_handler(void *opaque, int n, int level)
    }
 }

-void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name)
+void ioapic_init_gsi(GSIState *gsi_state, Object *parent)
 {
    DeviceState *dev;
    SysBusDevice *d;
    unsigned int i;

-    assert(parent_name);
+    assert(parent);
    if (kvm_ioapic_in_kernel()) {
        dev = qdev_new(TYPE_KVM_IOAPIC);
    } else {
        dev = qdev_new(TYPE_IOAPIC);
    }
-    object_property_add_child(object_resolve_path(parent_name, NULL),
-                              "ioapic", OBJECT(dev));
+    object_property_add_child(parent, "ioapic", OBJECT(dev));
    d = SYS_BUS_DEVICE(dev);
    sysbus_realize_and_unref(d, &error_fatal);
    sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS);
@@ -1154,9 +1154,17 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
        (bios_size % 65536) != 0) {
        goto bios_error;
    }
+
    bios = g_malloc(sizeof(*bios));
-    memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal);
-    if (sev_enabled()) {
+    if (is_tdx_vm()) {
+        memory_region_init_ram_guest_memfd(bios, NULL, "pc.bios", bios_size,
+                                           &error_fatal);
+        tdx_set_tdvf_region(bios);
+    } else {
+        memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal);
+    }
+
+    if (sev_enabled() || is_tdx_vm()) {
        /*
         * The concept of a "reset" simply doesn't exist for
         * confidential computing guests, we have to destroy and
@@ -1178,17 +1186,20 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
    }
    g_free(filename);

-    /* map the last 128KB of the BIOS in ISA space */
-    isa_bios_size = MIN(bios_size, 128 * KiB);
-    isa_bios = g_malloc(sizeof(*isa_bios));
-    memory_region_init_alias(isa_bios, NULL, "isa-bios", bios,
-                             bios_size - isa_bios_size, isa_bios_size);
-    memory_region_add_subregion_overlap(rom_memory,
-                                        0x100000 - isa_bios_size,
-                                        isa_bios,
-                                        1);
-    if (!isapc_ram_fw) {
-        memory_region_set_readonly(isa_bios, true);
+    /* For TDX, alias different GPAs to same private memory is not supported */
+    if (!is_tdx_vm()) {
+        /* map the last 128KB of the BIOS in ISA space */
+        isa_bios_size = MIN(bios_size, 128 * KiB);
+        isa_bios = g_malloc(sizeof(*isa_bios));
+        memory_region_init_alias(isa_bios, NULL, "isa-bios", bios,
+                                bios_size - isa_bios_size, isa_bios_size);
+        memory_region_add_subregion_overlap(rom_memory,
+                                            0x100000 - isa_bios_size,
+                                            isa_bios,
+                                            1);
+        if (!isapc_ram_fw) {
+            memory_region_set_readonly(isa_bios, true);
+        }
    }

    /* map all the bios at the top of memory */
@@ -1386,6 +1397,17 @@ static void machine_set_sgx_epc(Object *obj, Visitor *v, const char *name,
    qapi_free_SgxEPCList(list);
 }

+static int x86_kvm_type(MachineState *ms, const char *vm_type)
+{
+    X86MachineState *x86ms = X86_MACHINE(ms);
+    int kvm_type;
+
+    kvm_type = kvm_enabled() ? kvm_get_vm_type(ms, vm_type) : 0;
+    x86ms->vm_type = kvm_type;
+
+    return kvm_type;
+}
+
 static void x86_machine_initfn(Object *obj)
 {
    X86MachineState *x86ms = X86_MACHINE(obj);
@@ -1399,6 +1421,7 @@ static void x86_machine_initfn(Object *obj)
    x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
    x86ms->bus_lock_ratelimit = 0;
    x86ms->above_4g_mem_start = 4 * GiB;
+    x86ms->eoi_intercept_unsupported = false;
 }

 static void x86_machine_class_init(ObjectClass *oc, void *data)
@@ -1410,6 +1433,7 @@ static void x86_machine_class_init(ObjectClass *oc, void *data)
    mc->cpu_index_to_instance_props = x86_cpu_index_to_props;
    mc->get_default_cpu_node_id = x86_get_default_cpu_node_id;
    mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids;
+    mc->kvm_type = x86_kvm_type;
    x86mc->save_tsc_khz = true;
    x86mc->fwcfg_dma_enabled = true;
    nc->nmi_monitor_handler = x86_nmi;
--- a/hw/intc/Kconfig
+++ b/hw/intc/Kconfig
@@ -12,10 +12,6 @@ config IOAPIC
    bool
    select I8259

-config ARM_GIC
-    bool
-    select MSI_NONBROKEN
-
 config OPENPIC
    bool
    select MSI_NONBROKEN
@@ -25,14 +21,18 @@ config APIC
    select MSI_NONBROKEN
    select I8259

+config ARM_GIC
+    bool
+    select ARM_GICV3_TCG if TCG
+    select ARM_GIC_KVM if KVM
+    select MSI_NONBROKEN
+
 config ARM_GICV3_TCG
    bool
-    default y
    depends on ARM_GIC && TCG

 config ARM_GIC_KVM
    bool
-    default y
    depends on ARM_GIC && KVM

 config XICS
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -476,7 +476,6 @@ static void setup_interrupt(IVShmemState *s, int vector, Error **errp)

 static void process_msg_shmem(IVShmemState *s, int fd, Error **errp)
 {
-    Error *local_err = NULL;
    struct stat buf;
    size_t size;

@@ -496,10 +495,9 @@ static void process_msg_shmem(IVShmemState *s, int fd, Error **errp)
    size = buf.st_size;

    /* mmap the region and map into the BAR2 */
-    memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s), "ivshmem.bar2",
-                                   size, RAM_SHARED, fd, 0, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    if (!memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s),
+                                        "ivshmem.bar2", size, RAM_SHARED,
+                                        fd, 0, errp)) {
        return;
    }

--- a/hw/net/tulip.c
+++ b/hw/net/tulip.c
@@ -421,7 +421,7 @@ static uint16_t tulip_mdi_default[] = {
    /* MDI Registers 8 - 15 */
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
    /* MDI Registers 16 - 31 */
-    0x0003, 0x0000, 0x0001, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0003, 0x0000, 0x0001, 0x0000, 0x3b40, 0x0000, 0x0000, 0x0000,
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 };

@@ -429,7 +429,7 @@ static uint16_t tulip_mdi_default[] = {
 static const uint16_t tulip_mdi_mask[] = {
    0x0000, 0xffff, 0xffff, 0xffff, 0xc01f, 0xffff, 0xffff, 0x0000,
    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-    0x0fff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+    0x0fff, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff, 0xffff, 0xffff,
    0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 };

--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -7924,7 +7924,7 @@ static void nvme_init_state(NvmeCtrl *n)
    n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1);
    QTAILQ_INIT(&n->aer_queue);

-    list->numcntl = cpu_to_le16(max_vfs);
+    list->numcntl = max_vfs;
    for (i = 0; i < max_vfs; i++) {
        sctrl = &list->sec[i];
        sctrl->pcid = cpu_to_le16(n->cntlid);
@@ -8466,36 +8466,26 @@ static void nvme_pci_reset(DeviceState *qdev)
    nvme_ctrl_reset(n, NVME_RESET_FUNCTION);
 }

-static void nvme_sriov_pre_write_ctrl(PCIDevice *dev, uint32_t address,
-                                      uint32_t val, int len)
+static void nvme_sriov_post_write_config(PCIDevice *dev, uint16_t old_num_vfs)
 {
    NvmeCtrl *n = NVME(dev);
    NvmeSecCtrlEntry *sctrl;
-    uint16_t sriov_cap = dev->exp.sriov_cap;
-    uint32_t off = address - sriov_cap;
-    int i, num_vfs;
+    int i;

-    if (!sriov_cap) {
-        return;
-    }
-
-    if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) {
-        if (!(val & PCI_SRIOV_CTRL_VFE)) {
-            num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
-            for (i = 0; i < num_vfs; i++) {
-                sctrl = &n->sec_ctrl_list.sec[i];
-                nvme_virt_set_state(n, le16_to_cpu(sctrl->scid), false);
-            }
-        }
+    for (i = pcie_sriov_num_vfs(dev); i < old_num_vfs; i++) {
+        sctrl = &n->sec_ctrl_list.sec[i];
+        nvme_virt_set_state(n, le16_to_cpu(sctrl->scid), false);
    }
 }

 static void nvme_pci_write_config(PCIDevice *dev, uint32_t address,
                                  uint32_t val, int len)
 {
-    nvme_sriov_pre_write_ctrl(dev, address, val, len);
+    uint16_t old_num_vfs = pcie_sriov_num_vfs(dev);
+
    pci_default_write_config(dev, address, val, len);
    pcie_cap_flr_write_config(dev, address, val, len);
+    nvme_sriov_post_write_config(dev, old_num_vfs);
 }

 static const VMStateDescription nvme_vmstate = {
--- a/hw/nvram/nrf51_nvm.c
+++ b/hw/nvram/nrf51_nvm.c
@@ -336,12 +336,9 @@ static void nrf51_nvm_init(Object *obj)
 static void nrf51_nvm_realize(DeviceState *dev, Error **errp)
 {
    NRF51NVMState *s = NRF51_NVM(dev);
-    Error *err = NULL;

-    memory_region_init_rom_device(&s->flash, OBJECT(dev), &flash_ops, s,
-        "nrf51_soc.flash", s->flash_size, &err);
-    if (err) {
-        error_propagate(errp, err);
+    if (!memory_region_init_rom_device(&s->flash, OBJECT(dev), &flash_ops, s,
+                                       "nrf51_soc.flash", s->flash_size, errp)) {
        return;
    }

--- a/hw/pci-host/designware.c
+++ b/hw/pci-host/designware.c
@@ -340,6 +340,8 @@ static void designware_pcie_root_config_write(PCIDevice *d, uint32_t address,
        break;

    case DESIGNWARE_PCIE_ATU_VIEWPORT:
+        val &= DESIGNWARE_PCIE_ATU_REGION_INBOUND |
+                (DESIGNWARE_PCIE_NUM_VIEWPORTS - 1);
        root->atu_viewport = val;
        break;

--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -179,6 +179,8 @@ static Property q35_host_props[] = {
                     mch.below_4g_mem_size, 0),
    DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MEM_SIZE, Q35PCIHost,
                     mch.above_4g_mem_size, 0),
+    DEFINE_PROP_BOOL(PCI_HOST_PROP_SMM_RANGES, Q35PCIHost,
+                     mch.has_smm_ranges, true),
    DEFINE_PROP_BOOL("x-pci-hole64-fix", Q35PCIHost, pci_hole64_fix, true),
    DEFINE_PROP_END_OF_LIST(),
 };
@@ -214,6 +216,7 @@ static void q35_host_initfn(Object *obj)
    /* mch's object_initialize resets the default value, set it again */
    qdev_prop_set_uint64(DEVICE(s), PCI_HOST_PROP_PCI_HOLE64_SIZE,
                         Q35_PCI_HOST_HOLE64_SIZE_DEFAULT);
+
    object_property_add(obj, PCI_HOST_PROP_PCI_HOLE_START, "uint32",
                        q35_host_get_pci_hole_start,
                        NULL, NULL, NULL);
@@ -476,6 +479,10 @@ static void mch_write_config(PCIDevice *d,
        mch_update_pciexbar(mch);
    }

+    if (!mch->has_smm_ranges) {
+        return;
+    }
+
    if (ranges_overlap(address, len, MCH_HOST_BRIDGE_SMRAM,
                       MCH_HOST_BRIDGE_SMRAM_SIZE)) {
        mch_update_smram(mch);
@@ -494,10 +501,13 @@ static void mch_write_config(PCIDevice *d,
 static void mch_update(MCHPCIState *mch)
 {
    mch_update_pciexbar(mch);
+
    mch_update_pam(mch);
-    mch_update_smram(mch);
-    mch_update_ext_tseg_mbytes(mch);
-    mch_update_smbase_smram(mch);
+    if (mch->has_smm_ranges) {
+        mch_update_smram(mch);
+        mch_update_ext_tseg_mbytes(mch);
+        mch_update_smbase_smram(mch);
+    }

    /*
     * pci hole goes from end-of-low-ram to io-apic.
@@ -538,19 +548,21 @@ static void mch_reset(DeviceState *qdev)
    pci_set_quad(d->config + MCH_HOST_BRIDGE_PCIEXBAR,
                 MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT);

-    d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT;
-    d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT;
-    d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK;
-    d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK;
+    if (mch->has_smm_ranges) {
+        d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT;
+        d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT;
+        d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK;
+        d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK;

-    if (mch->ext_tseg_mbytes > 0) {
-        pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES,
-                     MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY);
+        if (mch->ext_tseg_mbytes > 0) {
+            pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES,
+                        MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY);
+        }
+
+        d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0;
+        d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff;
    }

-    d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0;
-    d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff;
-
    mch_update(mch);
 }

@@ -568,6 +580,20 @@ static void mch_realize(PCIDevice *d, Error **errp)
    /* setup pci memory mapping */
    pc_pci_as_mapping_init(mch->system_memory, mch->pci_address_space);

+    /* PAM */
+    init_pam(&mch->pam_regions[0], OBJECT(mch), mch->ram_memory,
+             mch->system_memory, mch->pci_address_space,
+             PAM_BIOS_BASE, PAM_BIOS_SIZE);
+    for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) {
+        init_pam(&mch->pam_regions[i + 1], OBJECT(mch), mch->ram_memory,
+                 mch->system_memory, mch->pci_address_space,
+                 PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE);
+    }
+
+    if (!mch->has_smm_ranges) {
+        return;
+    }
+
    /* if *disabled* show SMRAM to all CPUs */
    memory_region_init_alias(&mch->smram_region, OBJECT(mch), "smram-region",
                             mch->pci_address_space, MCH_HOST_BRIDGE_SMRAM_C_BASE,
@@ -634,15 +660,6 @@ static void mch_realize(PCIDevice *d, Error **errp)

    object_property_add_const_link(qdev_get_machine(), "smram",
                                   OBJECT(&mch->smram));
-
-    init_pam(&mch->pam_regions[0], OBJECT(mch), mch->ram_memory,
-             mch->system_memory, mch->pci_address_space,
-             PAM_BIOS_BASE, PAM_BIOS_SIZE);
-    for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) {
-        init_pam(&mch->pam_regions[i + 1], OBJECT(mch), mch->ram_memory,
-                 mch->system_memory, mch->pci_address_space,
-                 PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE);
-    }
 }

 uint64_t mch_mcfg_base(void)
--- a/hw/pci-host/raven.c
+++ b/hw/pci-host/raven.c
@@ -345,8 +345,10 @@ static void raven_realize(PCIDevice *d, Error **errp)
    d->config[PCI_LATENCY_TIMER] = 0x10;
    d->config[PCI_CAPABILITY_LIST] = 0x00;

-    memory_region_init_rom_nomigrate(&s->bios, OBJECT(s), "bios", BIOS_SIZE,
-                                     &error_fatal);
+    if (!memory_region_init_rom_nomigrate(&s->bios, OBJECT(s), "bios",
+                                          BIOS_SIZE, errp)) {
+        return;
+    }
    memory_region_add_subregion(get_system_memory(), (uint32_t)(-BIOS_SIZE),
                                &s->bios);
    if (s->bios_name) {
--- a/hw/ppc/pef.c
+++ b/hw/ppc/pef.c
@@ -15,7 +15,6 @@
 #include "sysemu/kvm.h"
 #include "migration/blocker.h"
 #include "exec/confidential-guest-support.h"
-#include "hw/ppc/pef.h"

 #define TYPE_PEF_GUEST "pef-guest"
 OBJECT_DECLARE_SIMPLE_TYPE(PefGuest, PEF_GUEST)
@@ -93,7 +92,7 @@ static int kvmppc_svm_off(Error **errp)
 #endif
 }

-int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
+static int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
 {
    if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) {
        return 0;
@@ -107,7 +106,7 @@ int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
    return kvmppc_svm_init(cgs, errp);
 }

-int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp)
+static int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp)
 {
    if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) {
        return 0;
@@ -131,6 +130,10 @@ OBJECT_DEFINE_TYPE_WITH_INTERFACES(PefGuest,

 static void pef_guest_class_init(ObjectClass *oc, void *data)
 {
+    ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
+
+    klass->kvm_init = pef_kvm_init;
+    klass->kvm_reset = pef_kvm_reset;
 }

 static void pef_guest_init(Object *obj)
--- a/hw/ppc/rs6000_mc.c
+++ b/hw/ppc/rs6000_mc.c
@@ -143,7 +143,6 @@ static void rs6000mc_realize(DeviceState *dev, Error **errp)
    RS6000MCState *s = RS6000MC(dev);
    int socket = 0;
    unsigned int ram_size = s->ram_size / MiB;
-    Error *local_err = NULL;

    while (socket < 6) {
        if (ram_size >= 64) {
@@ -165,10 +164,8 @@ static void rs6000mc_realize(DeviceState *dev, Error **errp)
        if (s->simm_size[socket]) {
            char name[] = "simm.?";
            name[5] = socket + '0';
-            memory_region_init_ram(&s->simm[socket], OBJECT(dev), name,
-                                   s->simm_size[socket] * MiB, &local_err);
-            if (local_err) {
-                error_propagate(errp, local_err);
+            if (!memory_region_init_ram(&s->simm[socket], OBJECT(dev), name,
+                                        s->simm_size[socket] * MiB, errp)) {
                return;
            }
            memory_region_add_subregion_overlap(get_system_memory(), 0,
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -74,6 +74,7 @@
 #include "hw/virtio/vhost-scsi-common.h"

 #include "exec/ram_addr.h"
+#include "exec/confidential-guest-support.h"
 #include "hw/usb.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
@@ -86,7 +87,6 @@
 #include "hw/ppc/spapr_tpm_proxy.h"
 #include "hw/ppc/spapr_nvdimm.h"
 #include "hw/ppc/spapr_numa.h"
-#include "hw/ppc/pef.h"

 #include "monitor/monitor.h"

@@ -1687,7 +1687,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason)
        qemu_guest_getrandom_nofail(spapr->fdt_rng_seed, 32);
    }

-    pef_kvm_reset(machine->cgs, &error_fatal);
+    if (machine->cgs) {
+        confidential_guest_kvm_reset(machine->cgs, &error_fatal);
+    }
    spapr_caps_apply(spapr);

    first_ppc_cpu = POWERPC_CPU(first_cpu);
@@ -2810,7 +2812,9 @@ static void spapr_machine_init(MachineState *machine)
    /*
     * if Secure VM (PEF) support is configured, then initialize it
     */
-    pef_kvm_init(machine->cgs, &error_fatal);
+    if (machine->cgs) {
+        confidential_guest_kvm_init(machine->cgs, &error_fatal);
+    }

    msi_nonbroken = true;

@@ -4647,13 +4651,10 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
    mc->block_default_type = IF_SCSI;

    /*
-     * Setting max_cpus to INT32_MAX. Both KVM and TCG max_cpus values
-     * should be limited by the host capability instead of hardcoded.
-     * max_cpus for KVM guests will be checked in kvm_init(), and TCG
-     * guests are welcome to have as many CPUs as the host are capable
-     * of emulate.
+     * While KVM determines max cpus in kvm_init() using kvm_max_vcpus(),
+     * In TCG the limit is restricted by the range of CPU IPIs available.
     */
-    mc->max_cpus = INT32_MAX;
+    mc->max_cpus = SPAPR_IRQ_NR_IPIS;

    mc->no_parallel = 1;
    mc->default_boot_order = "";
--- a/hw/ppc/spapr_irq.c
+++ b/hw/ppc/spapr_irq.c
@@ -23,6 +23,8 @@

 #include "trace.h"

+QEMU_BUILD_BUG_ON(SPAPR_IRQ_NR_IPIS > SPAPR_XIRQ_BASE);
+
 static const TypeInfo spapr_intc_info = {
    .name = TYPE_SPAPR_INTC,
    .parent = TYPE_INTERFACE,
@@ -329,7 +331,7 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
        int i;

        dev = qdev_new(TYPE_SPAPR_XIVE);
-        qdev_prop_set_uint32(dev, "nr-irqs", smc->nr_xirqs + SPAPR_XIRQ_BASE);
+        qdev_prop_set_uint32(dev, "nr-irqs", smc->nr_xirqs + SPAPR_IRQ_NR_IPIS);
        /*
         * 8 XIVE END structures per CPU. One for each available
         * priority
@@ -356,7 +358,7 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
    }

    spapr->qirqs = qemu_allocate_irqs(spapr_set_irq, spapr,
-                                      smc->nr_xirqs + SPAPR_XIRQ_BASE);
+                                      smc->nr_xirqs + SPAPR_IRQ_NR_IPIS);

    /*
     * Mostly we don't actually need this until reset, except that not
--- a/hw/riscv/virt-acpi-build.c
+++ b/hw/riscv/virt-acpi-build.c
@@ -132,7 +132,7 @@ static void build_rhct(GArray *table_data,
    size_t len, aligned_len;
    uint32_t isa_offset, num_rhct_nodes;
    RISCVCPU *cpu;
-    char *isa;
+    g_autofree char *isa = NULL;

    AcpiTable table = { .sig = "RHCT", .rev = 1, .oem_id = s->oem_id,
                        .oem_table_id = s->oem_table_id };
--- a/hw/rtc/pl031.c
+++ b/hw/rtc/pl031.c
@@ -141,6 +141,7 @@ static void pl031_write(void * opaque, hwaddr offset,
        g_autofree const char *qom_path = object_get_canonical_path(opaque);
        struct tm tm;

+        s->lr = value;
        s->tick_offset += value - pl031_get_count(s);

        qemu_get_timedate(&tm, s->tick_offset);
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -14,6 +14,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "exec/ram_addr.h"
+#include "exec/confidential-guest-support.h"
 #include "hw/s390x/s390-virtio-hcall.h"
 #include "hw/s390x/sclp.h"
 #include "hw/s390x/s390_flic.h"
@@ -267,7 +268,9 @@ static void ccw_init(MachineState *machine)
    s390_init_cpus(machine);

    /* Need CPU model to be determined before we can set up PV */
-    s390_pv_init(machine->cgs, &error_fatal);
+    if (machine->cgs) {
+        confidential_guest_kvm_init(machine->cgs, &error_fatal);
+    }

    s390_flic_init();

--- a/hw/scsi/lsi53c895a.c
+++ b/hw/scsi/lsi53c895a.c
@@ -1159,6 +1159,7 @@ again:
        lsi_script_scsi_interrupt(s, LSI_SIST0_UDC, 0);
        lsi_disconnect(s);
        trace_lsi_execute_script_stop();
+        reentrancy_level--;
        return;
    }
    insn = read_dword(s, s->dsp);
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -1928,7 +1928,7 @@ static void megasas_command_cancelled(SCSIRequest *req)
 {
    MegasasCmd *cmd = req->hba_private;

-    if (!cmd) {
+    if (!cmd || !cmd->frame) {
        return;
    }
    cmd->frame->header.cmd_status = MFI_STAT_SCSI_IO_FAILED;
--- a/hw/scsi/scsi-generic.c
+++ b/hw/scsi/scsi-generic.c
@@ -327,11 +327,17 @@ static void scsi_read_complete(void * opaque, int ret)
    if (r->req.cmd.buf[0] == READ_CAPACITY_10 &&
        (ldl_be_p(&r->buf[0]) != 0xffffffffU || s->max_lba == 0)) {
        s->blocksize = ldl_be_p(&r->buf[4]);
-        s->max_lba = ldl_be_p(&r->buf[0]) & 0xffffffffULL;
+        BlockBackend *blk = s->conf.blk;
+        BlockDriverState *bs = blk_bs(blk);
+        s->max_lba = bs->total_sectors - 1;
+        stl_be_p(&r->buf[0], s->max_lba);
    } else if (r->req.cmd.buf[0] == SERVICE_ACTION_IN_16 &&
               (r->req.cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) {
        s->blocksize = ldl_be_p(&r->buf[8]);
-        s->max_lba = ldq_be_p(&r->buf[0]);
+        BlockBackend *blk = s->conf.blk;
+        BlockDriverState *bs = blk_bs(blk);
+        s->max_lba = bs->total_sectors - 1;
+        stq_be_p(&r->buf[0], s->max_lba);
    }

    /*
@@ -396,7 +402,10 @@ static void scsi_write_complete(void * opaque, int ret)
    assert(r->req.aiocb != NULL);
    r->req.aiocb = NULL;

-    if (ret || r->req.io_canceled) {
+    if (ret || r->req.io_canceled ||
+        r->io_header.status != SCSI_HOST_OK ||
+        (r->io_header.driver_status & SG_ERR_DRIVER_TIMEOUT) ||
+        r->io_header.status != GOOD) {
        scsi_command_complete_noio(r, ret);
        goto done;
    }
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -1149,6 +1149,7 @@ static void virtio_scsi_drained_begin(SCSIBus *bus)
 static void virtio_scsi_drained_end(SCSIBus *bus)
 {
    VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus);
+    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
    VirtIODevice *vdev = VIRTIO_DEVICE(s);
    uint32_t total_queues = VIRTIO_SCSI_VQ_NUM_FIXED +
                            s->parent_obj.conf.num_queues;
@@ -1166,7 +1167,11 @@ static void virtio_scsi_drained_end(SCSIBus *bus)

    for (uint32_t i = 0; i < total_queues; i++) {
        VirtQueue *vq = virtio_get_queue(vdev, i);
-        virtio_queue_aio_attach_host_notifier(vq, s->ctx);
+        if (vq == vs->event_vq) {
+            virtio_queue_aio_attach_host_notifier_no_poll(vq, s->ctx);
+        } else {
+            virtio_queue_aio_attach_host_notifier(vq, s->ctx);
+        }
    }
 }

--- a/hw/smbios/smbios.c
+++ b/hw/smbios/smbios.c
@@ -346,6 +346,11 @@ static const QemuOptDesc qemu_smbios_type4_opts[] = {
 };

 static const QemuOptDesc qemu_smbios_type8_opts[] = {
+    {
+        .name = "type",
+        .type = QEMU_OPT_NUMBER,
+        .help = "SMBIOS element type",
+    },
    {
        .name = "internal_reference",
        .type = QEMU_OPT_STRING,
@@ -366,9 +371,15 @@ static const QemuOptDesc qemu_smbios_type8_opts[] = {
        .type = QEMU_OPT_NUMBER,
        .help = "port type",
    },
+    { /* end of list */ }
 };

 static const QemuOptDesc qemu_smbios_type11_opts[] = {
+    {
+        .name = "type",
+        .type = QEMU_OPT_NUMBER,
+        .help = "SMBIOS element type",
+    },
    {
        .name = "value",
        .type = QEMU_OPT_STRING,
@@ -379,6 +390,7 @@ static const QemuOptDesc qemu_smbios_type11_opts[] = {
        .type = QEMU_OPT_STRING,
        .help = "OEM string data from file",
    },
+    { /* end of list */ }
 };

 static const QemuOptDesc qemu_smbios_type17_opts[] = {
@@ -1251,6 +1263,7 @@ void smbios_entry_add(QemuOpts *opts, Error **errp)
        struct smbios_structure_header *header;
        int size;
        struct smbios_table *table; /* legacy mode only */
+        uint8_t *dbl_nulls, *orig_end;

        if (!qemu_opts_validate(opts, qemu_smbios_file_opts, errp)) {
            return;
@@ -1263,11 +1276,21 @@ void smbios_entry_add(QemuOpts *opts, Error **errp)
        }

        /*
-         * NOTE: standard double '\0' terminator expected, per smbios spec.
-         * (except in legacy mode, where the second '\0' is implicit and
-         *  will be inserted by the BIOS).
+         * NOTE: standard double '\0' terminator expected, per smbios spec,
+         * unless the data is formatted for legacy mode, which is used by
+         * pc-i440fx-2.0 and earlier machine types. Legacy mode structures
+         * without strings have no '\0' terminators, and those with strings
+         * also don't have an additional '\0' terminator at the end of the
+         * final string '\0' terminator. The BIOS will add the '\0' terminators
+         * to comply with the smbios spec.
+         * For greater compatibility, regardless of the machine type used,
+         * either format is accepted.
         */
-        smbios_tables = g_realloc(smbios_tables, smbios_tables_len + size);
+        smbios_tables = g_realloc(smbios_tables, smbios_tables_len + size + 2);
+        orig_end = smbios_tables + smbios_tables_len + size;
+        /* add extra null bytes to end in case of legacy file data */
+        *orig_end = '\0';
+        *(orig_end + 1) = '\0';
        header = (struct smbios_structure_header *)(smbios_tables +
                                                    smbios_tables_len);

@@ -1285,6 +1308,20 @@ void smbios_entry_add(QemuOpts *opts, Error **errp)
            }
            set_bit(header->type, have_binfile_bitmap);
        }
+        for (dbl_nulls = smbios_tables + smbios_tables_len + header->length;
+             dbl_nulls + 2 <= orig_end; dbl_nulls++) {
+            if (*dbl_nulls == '\0' && *(dbl_nulls + 1) == '\0') {
+                break;
+            }
+        }
+        if (dbl_nulls + 2  < orig_end) {
+            error_setg(errp, "SMBIOS file data malformed");
+            return;
+        }
+        /* increase size by how many extra nulls were actually needed */
+        size += dbl_nulls + 2 - orig_end;
+        smbios_tables = g_realloc(smbios_tables, smbios_tables_len + size);
+        set_bit(header->type, have_binfile_bitmap);

        if (header->type == 4) {
            smbios_type4_count++;
@@ -1304,6 +1341,17 @@ void smbios_entry_add(QemuOpts *opts, Error **errp)
         *       delete the one we don't need from smbios_set_defaults(),
         *       once we know which machine version has been requested.
         */
+        if (dbl_nulls + 2 == orig_end) {
+            /* chop off nulls to get legacy format */
+            if (header->length + 2 == size) {
+                size -= 2;
+            } else {
+                size -= 1;
+            }
+        } else {
+            /* undo conversion from legacy format to per-spec format */
+            size -= dbl_nulls + 2 - orig_end;
+        }
        if (!smbios_entries) {
            smbios_entries_len = sizeof(uint16_t);
            smbios_entries = g_malloc0(smbios_entries_len);
--- a/hw/sparc/sun4m.c
+++ b/hw/sparc/sun4m.c
@@ -577,12 +577,9 @@ static void idreg_realize(DeviceState *ds, Error **errp)
 {
    IDRegState *s = MACIO_ID_REGISTER(ds);
    SysBusDevice *dev = SYS_BUS_DEVICE(ds);
-    Error *local_err = NULL;

-    memory_region_init_ram_nomigrate(&s->mem, OBJECT(ds), "sun4m.idreg",
-                                     sizeof(idreg_data), &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    if (!memory_region_init_ram_nomigrate(&s->mem, OBJECT(ds), "sun4m.idreg",
+                                          sizeof(idreg_data), errp)) {
        return;
    }

@@ -631,12 +628,9 @@ static void afx_realize(DeviceState *ds, Error **errp)
 {
    AFXState *s = TCX_AFX(ds);
    SysBusDevice *dev = SYS_BUS_DEVICE(ds);
-    Error *local_err = NULL;

-    memory_region_init_ram_nomigrate(&s->mem, OBJECT(ds), "sun4m.afx", 4,
-                                     &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    if (!memory_region_init_ram_nomigrate(&s->mem, OBJECT(ds), "sun4m.afx",
+                                          4, errp)) {
        return;
    }

@@ -715,12 +709,9 @@ static void prom_realize(DeviceState *ds, Error **errp)
 {
    PROMState *s = OPENPROM(ds);
    SysBusDevice *dev = SYS_BUS_DEVICE(ds);
-    Error *local_err = NULL;

-    memory_region_init_ram_nomigrate(&s->prom, OBJECT(ds), "sun4m.prom",
-                                     PROM_SIZE_MAX, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    if (!memory_region_init_ram_nomigrate(&s->prom, OBJECT(ds), "sun4m.prom",
+                                          PROM_SIZE_MAX, errp)) {
        return;
    }

--- a/hw/sparc64/sun4u.c
+++ b/hw/sparc64/sun4u.c
@@ -454,12 +454,9 @@ static void prom_realize(DeviceState *ds, Error **errp)
 {
    PROMState *s = OPENPROM(ds);
    SysBusDevice *dev = SYS_BUS_DEVICE(ds);
-    Error *local_err = NULL;

-    memory_region_init_ram_nomigrate(&s->prom, OBJECT(ds), "sun4u.prom",
-                                     PROM_SIZE_MAX, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+    if (!memory_region_init_ram_nomigrate(&s->prom, OBJECT(ds), "sun4u.prom",
+                                          PROM_SIZE_MAX, errp)) {
        return;
    }

--- a/hw/usb/bus.c
+++ b/hw/usb/bus.c
@@ -273,13 +273,14 @@ static void usb_qdev_realize(DeviceState *qdev, Error **errp)
    }

    if (dev->pcap_filename) {
-        int fd = qemu_open_old(dev->pcap_filename, O_CREAT | O_WRONLY | O_TRUNC, 0666);
+        int fd = qemu_open_old(dev->pcap_filename,
+                               O_CREAT | O_WRONLY | O_TRUNC | O_BINARY, 0666);
        if (fd < 0) {
            error_setg(errp, "open %s failed", dev->pcap_filename);
            usb_qdev_unrealize(qdev);
            return;
        }
-        dev->pcap = fdopen(fd, "w");
+        dev->pcap = fdopen(fd, "wb");
        usb_pcap_init(dev->pcap);
    }
 }
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -824,9 +824,11 @@ static void vfio_msix_disable(VFIOPCIDevice *vdev)
        }
    }

-    if (vdev->nr_vectors) {
-        vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX);
-    }
+    /*
+     * Always clear MSI-X IRQ index. A PF device could have enabled
+     * MSI-X with no vectors. See vfio_msix_enable().
+     */
+    vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX);

    vfio_msi_disable_common(vdev);
    vfio_intx_enable(vdev, &err);
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -1264,6 +1264,8 @@ static void virtio_iommu_system_reset(void *opaque)

    trace_virtio_iommu_system_reset();

+    memset(s->iommu_pcibus_by_bus_num, 0, sizeof(s->iommu_pcibus_by_bus_num));
+
    /*
     * config.bypass is sticky across device reset, but should be restored on
     * system reset
@@ -1302,8 +1304,6 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)

    virtio_init(vdev, VIRTIO_ID_IOMMU, sizeof(struct virtio_iommu_config));

-    memset(s->iommu_pcibus_by_bus_num, 0, sizeof(s->iommu_pcibus_by_bus_num));
-
    s->req_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE,
                             virtio_iommu_handle_command);
    s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL);
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -605,8 +605,7 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
        int fd = memory_region_get_fd(&vmem->memdev->mr);
        Error *local_err = NULL;

-        qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err);
-        if (local_err) {
+        if (!qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err)) {
            static bool warned;

            /*
@@ -1249,8 +1248,7 @@ static int virtio_mem_prealloc_range_cb(VirtIOMEM *vmem, void *arg,
    int fd = memory_region_get_fd(&vmem->memdev->mr);
    Error *local_err = NULL;

-    qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err);
-    if (local_err) {
+    if (!qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err)) {
        error_report_err(local_err);
        return -ENOMEM;
    }
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -3556,6 +3556,17 @@ static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)

 void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
 {
+    /*
+     * virtio_queue_aio_detach_host_notifier() can leave notifications disabled.
+     * Re-enable them.  (And if detach has not been used before, notifications
+     * being enabled is still the default state while a notifier is attached;
+     * see virtio_queue_host_notifier_aio_poll_end(), which will always leave
+     * notifications enabled once the polling section is left.)
+     */
+    if (!virtio_queue_get_notification(vq)) {
+        virtio_queue_set_notification(vq, 1);
+    }
+
    aio_set_event_notifier(ctx, &vq->host_notifier,
                           virtio_queue_host_notifier_read,
                           virtio_queue_host_notifier_aio_poll,
@@ -3563,6 +3574,13 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
    aio_set_event_notifier_poll(ctx, &vq->host_notifier,
                                virtio_queue_host_notifier_aio_poll_begin,
                                virtio_queue_host_notifier_aio_poll_end);
+
+    /*
+     * We will have ignored notifications about new requests from the guest
+     * while no notifiers were attached, so "kick" the virt queue to process
+     * those requests now.
+     */
+    event_notifier_set(&vq->host_notifier);
 }

 /*
@@ -3573,14 +3591,38 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
 */
 void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx)
 {
+    /* See virtio_queue_aio_attach_host_notifier() */
+    if (!virtio_queue_get_notification(vq)) {
+        virtio_queue_set_notification(vq, 1);
+    }
+
    aio_set_event_notifier(ctx, &vq->host_notifier,
                           virtio_queue_host_notifier_read,
                           NULL, NULL);
+
+    /*
+     * See virtio_queue_aio_attach_host_notifier().
+     * Note that this may be unnecessary for the type of virtqueues this
+     * function is used for.  Still, it will not hurt to have a quick look into
+     * whether we can/should process any of the virtqueue elements.
+     */
+    event_notifier_set(&vq->host_notifier);
 }

 void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
 {
    aio_set_event_notifier(ctx, &vq->host_notifier, NULL, NULL, NULL);
+
+    /*
+     * aio_set_event_notifier_poll() does not guarantee whether io_poll_end()
+     * will run after io_poll_begin(), so by removing the notifier, we do not
+     * know whether virtio_queue_host_notifier_aio_poll_end() has run after a
+     * previous virtio_queue_host_notifier_aio_poll_begin(), i.e. whether
+     * notifications are enabled or disabled.  It does not really matter anyway;
+     * we just removed the notifier, so we do not care about notifications until
+     * we potentially re-attach it.  The attach_host_notifier functions will
+     * ensure that notifications are enabled again when they are needed.
+     */
 }

 void virtio_queue_host_notifier_read(EventNotifier *n)
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -497,9 +497,14 @@ void aio_set_event_notifier(AioContext *ctx,
                            AioPollFn *io_poll,
                            EventNotifierHandler *io_poll_ready);

-/* Set polling begin/end callbacks for an event notifier that has already been
+/*
+ * Set polling begin/end callbacks for an event notifier that has already been
 * registered with aio_set_event_notifier.  Do nothing if the event notifier is
 * not registered.
+ *
+ * Note that if the io_poll_end() callback (or the entire notifier) is removed
+ * during polling, it will not be called, so an io_poll_begin() is not
+ * necessarily always followed by an io_poll_end().
 */
 void aio_set_event_notifier_poll(AioContext *ctx,
                                 EventNotifier *notifier,
--- a/include/block/block-hmp-cmds.h
+++ b/include/block/block-hmp-cmds.h
@@ -48,7 +48,7 @@ void hmp_eject(Monitor *mon, const QDict *qdict);

 void hmp_qemu_io(Monitor *mon, const QDict *qdict);

-void hmp_info_block(Monitor *mon, const QDict *qdict);
+void coroutine_fn hmp_info_block(Monitor *mon, const QDict *qdict);
 void hmp_info_blockstats(Monitor *mon, const QDict *qdict);
 void hmp_info_block_jobs(Monitor *mon, const QDict *qdict);
 void hmp_info_snapshots(Monitor *mon, const QDict *qdict);
--- a/include/block/qapi.h
+++ b/include/block/qapi.h
@@ -25,14 +25,14 @@
 #ifndef BLOCK_QAPI_H
 #define BLOCK_QAPI_H

+#include "block/block-common.h"
 #include "block/graph-lock.h"
 #include "block/snapshot.h"
 #include "qapi/qapi-types-block-core.h"

-BlockDeviceInfo * GRAPH_RDLOCK
-bdrv_block_device_info(BlockBackend *blk, BlockDriverState *bs,
-                       bool flat, Error **errp);
-
+BlockDeviceInfo *coroutine_fn GRAPH_RDLOCK
+bdrv_block_device_info(BlockBackend *blk, BlockDriverState *bs, bool flat,
+                       Error **errp);
 int GRAPH_RDLOCK
 bdrv_query_snapshot_info_list(BlockDriverState *bs,
                              SnapshotInfoList **p_list,
@@ -40,7 +40,10 @@ bdrv_query_snapshot_info_list(BlockDriverState *bs,
 void GRAPH_RDLOCK
 bdrv_query_image_info(BlockDriverState *bs, ImageInfo **p_info, bool flat,
                      bool skip_implicit_filters, Error **errp);
-void GRAPH_RDLOCK
+void coroutine_fn GRAPH_RDLOCK
+bdrv_co_query_block_graph_info(BlockDriverState *bs, BlockGraphInfo **p_info,
+                               Error **errp);
+void co_wrapper_bdrv_rdlock
 bdrv_query_block_graph_info(BlockDriverState *bs, BlockGraphInfo **p_info,
                            Error **errp);

--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .2.1
 .2.2