[HACK] i386/tdvf: Add the MMIO region info in HOB list

This is some workaround patch to configure the MMIO region larger than 4G. Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
hw/vfio: skip static IOMMU mapping if restricted memory region
2023-12-13 01:31:44 +00:00 · 2023-12-13 01:31:09 +00:00 · 2023-12-13 01:31:09 +00:00 · 2023-12-13 01:31:09 +00:00 · 2023-12-13 01:31:09 +00:00 · 2023-12-13 01:31:09 +00:00
113 changed files with 5618 additions and 625 deletions
--- a/18
+++ b/18
@@ -131,6 +131,17 @@ K: ^Subject:.*(?i)mips
 F: docs/system/target-mips.rst
 F: configs/targets/mips*

+X86 general architecture support
+M: Paolo Bonzini <pbonzini@redhat.com>
+S: Maintained
+F: configs/devices/i386-softmmu/default.mak
+F: configs/targets/i386-softmmu.mak
+F: configs/targets/x86_64-softmmu.mak
+F: docs/system/target-i386*
+F: target/i386/*.[ch]
+F: target/i386/Kconfig
+F: target/i386/meson.build
+
 Guest CPU cores (TCG)
 ---------------------
 Overall TCG CPUs
@@ -657,6 +668,7 @@ F: include/hw/dma/pl080.h
 F: hw/dma/pl330.c
 F: hw/gpio/pl061.c
 F: hw/input/pl050.c
+F: include/hw/input/pl050.h
 F: hw/intc/pl190.c
 F: hw/sd/pl181.c
 F: hw/ssi/pl022.c
@@ -927,6 +939,7 @@ F: hw/*/pxa2xx*
 F: hw/display/tc6393xb.c
 F: hw/gpio/max7310.c
 F: hw/gpio/zaurus.c
+F: hw/input/ads7846.c
 F: hw/misc/mst_fpga.c
 F: hw/adc/max111x.c
 F: include/hw/adc/max111x.h
@@ -979,7 +992,9 @@ M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/*/stellaris*
+F: hw/display/ssd03*
 F: include/hw/input/gamepad.h
+F: include/hw/timer/stellaris-gptm.h
 F: docs/system/arm/stellaris.rst

 STM32VLDISCOVERY
@@ -994,6 +1009,7 @@ M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/arm/vexpress.c
+F: hw/display/sii9022.c
 F: docs/system/arm/vexpress.rst

 Versatile PB
@@ -2241,7 +2257,7 @@ M: Stefan Hajnoczi <stefanha@redhat.com>
 S: Supported
 F: hw/virtio/vhost-user-fs*
 F: include/hw/virtio/vhost-user-fs.h
-L: virtio-fs@redhat.com
+L: virtio-fs@lists.linux.dev

 virtio-input
 M: Gerd Hoffmann <kraxel@redhat.com>
--- a/2
+++ b/2
@@ -1 +1 @@
-8.1.50
+8.1.90
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -101,6 +101,8 @@ bool kvm_msi_use_devid;
 bool kvm_has_guest_debug;
 static int kvm_sstep_flags;
 static bool kvm_immediate_exit;
+static bool kvm_guest_memfd_supported;
+static uint64_t kvm_supported_memory_attributes;
 static hwaddr kvm_max_slot_size = ~0;

 static const KVMCapabilityInfo kvm_required_capabilites[] = {
@@ -292,34 +294,69 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
 static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, bool new)
 {
    KVMState *s = kvm_state;
-    struct kvm_userspace_memory_region mem;
+    struct kvm_userspace_memory_region2 mem;
+    static int cap_user_memory2 = -1;
    int ret;

+    if (cap_user_memory2 == -1) {
+        cap_user_memory2 = kvm_check_extension(s, KVM_CAP_USER_MEMORY2);
+    }
+
+    if (!cap_user_memory2 && slot->guest_memfd >= 0) {
+        error_report("%s, KVM doesn't support KVM_CAP_USER_MEMORY2,"
+                     " which is required by guest memfd!", __func__);
+        exit(1);
+    }
+
    mem.slot = slot->slot | (kml->as_id << 16);
    mem.guest_phys_addr = slot->start_addr;
    mem.userspace_addr = (unsigned long)slot->ram;
    mem.flags = slot->flags;
+    mem.guest_memfd = slot->guest_memfd;
+    mem.guest_memfd_offset = slot->guest_memfd_offset;

    if (slot->memory_size && !new && (mem.flags ^ slot->old_flags) & KVM_MEM_READONLY) {
        /* Set the slot size to 0 before setting the slot to the desired
         * value. This is needed based on KVM commit 75d61fbc. */
        mem.memory_size = 0;
-        ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
+
+        if (cap_user_memory2) {
+            ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem);
+        } else {
+            ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
+	    }
        if (ret < 0) {
            goto err;
        }
    }
    mem.memory_size = slot->memory_size;
-    ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
+    if (cap_user_memory2) {
+        ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem);
+    } else {
+        ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
+    }
    slot->old_flags = mem.flags;
 err:
-    trace_kvm_set_user_memory(mem.slot, mem.flags, mem.guest_phys_addr,
-                              mem.memory_size, mem.userspace_addr, ret);
+    trace_kvm_set_user_memory(mem.slot >> 16, (uint16_t)mem.slot, mem.flags,
+                              mem.guest_phys_addr, mem.memory_size,
+                              mem.userspace_addr, mem.guest_memfd,
+                              mem.guest_memfd_offset, ret);
    if (ret < 0) {
-        error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d,"
-                     " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s",
-                     __func__, mem.slot, slot->start_addr,
-                     (uint64_t)mem.memory_size, strerror(errno));
+        if (cap_user_memory2) {
+                error_report("%s: KVM_SET_USER_MEMORY_REGION2 failed, slot=%d,"
+                        " start=0x%" PRIx64 ", size=0x%" PRIx64 ","
+                        " flags=0x%" PRIx32 ", guest_memfd=%" PRId32 ","
+                        " guest_memfd_offset=0x%" PRIx64 ": %s",
+                        __func__, mem.slot, slot->start_addr,
+                        (uint64_t)mem.memory_size, mem.flags,
+                        mem.guest_memfd, (uint64_t)mem.guest_memfd_offset,
+                        strerror(errno));
+        } else {
+                error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d,"
+                            " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s",
+                            __func__, mem.slot, slot->start_addr,
+                            (uint64_t)mem.memory_size, strerror(errno));
+        }
    }
    return ret;
 }
@@ -391,6 +428,11 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
    return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
 }

+int __attribute__ ((weak)) kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+    return 0;
+}
+
 int kvm_init_vcpu(CPUState *cpu, Error **errp)
 {
    KVMState *s = kvm_state;
@@ -399,15 +441,27 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)

    trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));

+    /*
+     * tdx_pre_create_vcpu() may call cpu_x86_cpuid(). It in turn may call
+     * kvm_vm_ioctl(). Set cpu->kvm_state in advance to avoid NULL pointer
+     * dereference.
+     */
+    cpu->kvm_state = s;
+    ret = kvm_arch_pre_create_vcpu(cpu, errp);
+    if (ret < 0) {
+        cpu->kvm_state = NULL;
+        goto err;
+    }
+
    ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
    if (ret < 0) {
        error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)",
                         kvm_arch_vcpu_id(cpu));
+        cpu->kvm_state = NULL;
        goto err;
    }

    cpu->kvm_fd = ret;
-    cpu->kvm_state = s;
    cpu->vcpu_dirty = true;
    cpu->dirty_pages = 0;
    cpu->throttle_us_per_full = 0;
@@ -475,6 +529,9 @@ static int kvm_mem_flags(MemoryRegion *mr)
    if (readonly && kvm_readonly_mem_allowed) {
        flags |= KVM_MEM_READONLY;
    }
+    if (memory_region_has_guest_memfd(mr)) {
+        flags |= KVM_MEM_PRIVATE;
+    }
    return flags;
 }

@@ -1266,6 +1323,44 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size)
    kvm_max_slot_size = max_slot_size;
 }

+static int kvm_set_memory_attributes(hwaddr start, hwaddr size, uint64_t attr)
+{
+    struct kvm_memory_attributes attrs;
+    int r;
+
+    attrs.attributes = attr;
+    attrs.address = start;
+    attrs.size = size;
+    attrs.flags = 0;
+
+    r = kvm_vm_ioctl(kvm_state, KVM_SET_MEMORY_ATTRIBUTES, &attrs);
+    if (r) {
+        warn_report("%s: failed to set memory (0x%lx+%#zx) with attr 0x%lx error '%s'",
+                     __func__, start, size, attr, strerror(errno));
+    }
+    return r;
+}
+
+int kvm_set_memory_attributes_private(hwaddr start, hwaddr size)
+{
+    if (!(kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE)) {
+        error_report("KVM doesn't support PRIVATE memory attribute\n");
+        return -EINVAL;
+    }
+
+    return kvm_set_memory_attributes(start, size, KVM_MEMORY_ATTRIBUTE_PRIVATE);
+}
+
+int kvm_set_memory_attributes_shared(hwaddr start, hwaddr size)
+{
+    if (!(kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE)) {
+        error_report("KVM doesn't support PRIVATE memory attribute\n");
+        return -EINVAL;
+    }
+
+    return kvm_set_memory_attributes(start, size, 0);
+}
+
 /* Called with KVMMemoryListener.slots_lock held */
 static void kvm_set_phys_mem(KVMMemoryListener *kml,
                             MemoryRegionSection *section, bool add)
@@ -1362,6 +1457,9 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
        mem->ram_start_offset = ram_start_offset;
        mem->ram = ram;
        mem->flags = kvm_mem_flags(mr);
+        mem->guest_memfd = mr->ram_block->guest_memfd;
+        mem->guest_memfd_offset = (uint8_t*)ram - mr->ram_block->host;
+
        kvm_slot_init_dirty_bitmap(mem);
        err = kvm_set_user_memory_region(kml, mem, true);
        if (err) {
@@ -1369,6 +1467,16 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
                    strerror(-err));
            abort();
        }
+
+        if (memory_region_is_default_private(mr)) {
+            err = kvm_set_memory_attributes_private(start_addr, slot_size);
+            if (err) {
+                error_report("%s: failed to set memory attribute private: %s\n",
+                             __func__, strerror(-err));
+                exit(1);
+            }
+        }
+
        start_addr += slot_size;
        ram_start_offset += slot_size;
        ram += slot_size;
@@ -2396,6 +2504,11 @@ static int kvm_init(MachineState *ms)
    }
    s->as = g_new0(struct KVMAs, s->nr_as);

+    kvm_guest_memfd_supported = kvm_check_extension(s, KVM_CAP_GUEST_MEMFD);
+
+    ret = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES);
+    kvm_supported_memory_attributes = ret > 0 ? ret : 0;
+
    if (object_property_find(OBJECT(current_machine), "kvm-type")) {
        g_autofree char *kvm_type = object_property_get_str(OBJECT(current_machine),
                                                            "kvm-type",
@@ -2816,6 +2929,79 @@ static void kvm_eat_signals(CPUState *cpu)
    } while (sigismember(&chkset, SIG_IPI));
 }

+int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private)
+{
+    MemoryRegionSection section;
+    ram_addr_t offset;
+    MemoryRegion *mr;
+    RAMBlock *rb;
+    void *addr;
+    int ret = -1;
+
+    trace_kvm_convert_memory(start, size, to_private ? "shared_to_private" : "private_to_shared");
+    section = memory_region_find(get_system_memory(), start, size);
+    mr = section.mr;
+    if (!mr) {
+        /*
+         * Ignore converting non-assigned region to shared.
+         *
+         * TDX requires vMMIO region to be shared to inject #VE to guest.
+         * OVMF issues conservatively MapGPA(shared) on 32bit PCI MMIO region,
+         * and vIO-APIC 0xFEC00000 4K page.
+         * OVMF assigns 32bit PCI MMIO region to
+         * [top of low memory: typically 2GB=0xC000000,  0xFC00000)
+         */
+        if (!to_private) {
+            ret = 0;
+        }
+        return ret;
+    }
+
+    if (memory_region_has_guest_memfd(mr)) {
+        if (to_private) {
+            ret = kvm_set_memory_attributes_private(start, size);
+        } else {
+            ret = kvm_set_memory_attributes_shared(start, size);
+        }
+
+        if (ret) {
+            memory_region_unref(section.mr);
+            return ret;
+        }
+
+        addr = memory_region_get_ram_ptr(section.mr) +
+               section.offset_within_region;
+        rb = qemu_ram_block_from_host(addr, false, &offset);
+        memory_region_convert_mem_attr(&section, !to_private);
+        /*
+         * With KVM_SET_MEMORY_ATTRIBUTES by kvm_set_memory_attributes(),
+         * operation on underlying file descriptor is only for releasing
+         * unnecessary pages.
+         */
+        ram_block_convert_range(rb, offset, size, to_private);
+    } else {
+        /*
+         * Because vMMIO region must be shared, guest TD may convert vMMIO
+         * region to shared explicitly.  Don't complain such case.  See
+         * memory_region_type() for checking if the region is MMIO region.
+         */
+        if (!to_private &&
+            !memory_region_is_ram(mr) &&
+            !memory_region_is_ram_device(mr) &&
+            !memory_region_is_rom(mr) &&
+            !memory_region_is_romd(mr)) {
+		    ret = 0;
+	    } else {
+            warn_report("Convert non guest_memfd backed memory region "
+                        "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s",
+                        start, size, to_private ? "private" : "shared");
+        }
+    }
+
+    memory_region_unref(section.mr);
+    return ret;
+}
+
 int kvm_cpu_exec(CPUState *cpu)
 {
    struct kvm_run *run = cpu->kvm_run;
@@ -2883,18 +3069,20 @@ int kvm_cpu_exec(CPUState *cpu)
                ret = EXCP_INTERRUPT;
                break;
            }
-            fprintf(stderr, "error: kvm run failed %s\n",
-                    strerror(-run_ret));
+            if (!(run_ret == -EFAULT && run->exit_reason == KVM_EXIT_MEMORY_FAULT)) {
+                fprintf(stderr, "error: kvm run failed %s\n",
+                        strerror(-run_ret));
 #ifdef TARGET_PPC
-            if (run_ret == -EBUSY) {
-                fprintf(stderr,
-                        "This is probably because your SMT is enabled.\n"
-                        "VCPU can only run on primary threads with all "
-                        "secondary threads offline.\n");
-            }
+                if (run_ret == -EBUSY) {
+                    fprintf(stderr,
+                            "This is probably because your SMT is enabled.\n"
+                            "VCPU can only run on primary threads with all "
+                            "secondary threads offline.\n");
+                }
 #endif
-            ret = -1;
-            break;
+                ret = -1;
+                break;
+            }
        }

        trace_kvm_run_exit(cpu->cpu_index, run->exit_reason);
@@ -2981,6 +3169,16 @@ int kvm_cpu_exec(CPUState *cpu)
                break;
            }
            break;
+        case KVM_EXIT_MEMORY_FAULT:
+            if (run->memory_fault.flags & ~KVM_MEMORY_EXIT_FLAG_PRIVATE) {
+                error_report("KVM_EXIT_MEMORY_FAULT: Unknown flag 0x%" PRIx64,
+                             (uint64_t)run->memory_fault.flags);
+                ret = -1;
+                break;
+            }
+            ret = kvm_convert_memory(run->memory_fault.gpa, run->memory_fault.size,
+                                     run->memory_fault.flags & KVM_MEMORY_EXIT_FLAG_PRIVATE);
+            break;
        default:
            DPRINTF("kvm_arch_handle_exit\n");
            ret = kvm_arch_handle_exit(cpu, run);
@@ -4077,3 +4275,24 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp)
        query_stats_schema_vcpu(first_cpu, &stats_args);
    }
 }
+
+int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp)
+{
+    int fd;
+    struct kvm_create_guest_memfd guest_memfd = {
+        .size = size,
+        .flags = flags,
+    };
+
+    if (!kvm_guest_memfd_supported) {
+        error_setg(errp, "KVM doesn't support guest memfd\n");
+        return -EOPNOTSUPP;
+    }
+
+    fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_GUEST_MEMFD, &guest_memfd);
+    if (fd < 0) {
+        error_setg_errno(errp, errno, "%s: error creating kvm guest memfd\n", __func__);
+    }
+
+    return fd;
+}
--- a/accel/kvm/trace-events
+++ b/accel/kvm/trace-events
@@ -15,7 +15,7 @@ kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
 kvm_irqchip_release_virq(int virq) "virq %d"
 kvm_set_ioeventfd_mmio(int fd, uint64_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%" PRIx64 " val=0x%x assign: %d size: %d match: %d"
 kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%x val=0x%x assign: %d size: %d match: %d"
-kvm_set_user_memory(uint32_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d"
+kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, uint32_t fd, uint64_t fd_offset, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " guest_memfd=%d" " guest_memfd_offset=0x%" PRIx64 " ret=%d"
 kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32
 kvm_resample_fd_notify(int gsi) "gsi %d"
 kvm_dirty_ring_full(int id) "vcpu %d"
@@ -25,4 +25,4 @@ kvm_dirty_ring_reaper(const char *s) "%s"
 kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages (took %"PRIi64" us)"
 kvm_dirty_ring_reaper_kick(const char *reason) "%s"
 kvm_dirty_ring_flush(int finished) "%d"
-
+kvm_convert_memory(uint64_t start, uint64_t size, const char *msg) "start 0x%" PRIx64 " size 0x%" PRIx64 " %s"
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -84,6 +84,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
    ram_flags |= fb->readonly ? RAM_READONLY_FD : 0;
    ram_flags |= fb->rom == ON_OFF_AUTO_ON ? RAM_READONLY : 0;
    ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
+    ram_flags |= backend->require_guest_memfd ? RAM_GUEST_MEMFD : 0;
    ram_flags |= fb->is_pmem ? RAM_PMEM : 0;
    ram_flags |= RAM_NAMED_FILE;
    memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name,
--- a/backends/hostmem-memfd.c
+++ b/backends/hostmem-memfd.c
@@ -55,6 +55,7 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
    name = host_memory_backend_get_name(backend);
    ram_flags = backend->share ? RAM_SHARED : 0;
    ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
+    ram_flags |= backend->require_guest_memfd ? RAM_GUEST_MEMFD : 0;
    memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name,
                                   backend->size, ram_flags, fd, 0, errp);
    g_free(name);
--- a/backends/hostmem-ram.c
+++ b/backends/hostmem-ram.c
@@ -30,6 +30,7 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
    name = host_memory_backend_get_name(backend);
    ram_flags = backend->share ? RAM_SHARED : 0;
    ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
+    ram_flags |= backend->require_guest_memfd ? RAM_GUEST_MEMFD : 0;
    memory_region_init_ram_flags_nomigrate(&backend->mr, OBJECT(backend), name,
                                           backend->size, ram_flags, errp);
    g_free(name);
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -279,6 +279,7 @@ static void host_memory_backend_init(Object *obj)
    /* TODO: convert access to globals to compat properties */
    backend->merge = machine_mem_merge(machine);
    backend->dump = machine_dump_guest_core(machine);
+    backend->require_guest_memfd = machine_require_guest_memfd(machine);
    backend->reserve = true;
    backend->prealloc_threads = machine->smp.cpus;
 }
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -629,7 +629,6 @@ int bdrv_all_goto_snapshot(const char *name,
    while (iterbdrvs) {
        BlockDriverState *bs = iterbdrvs->data;
        AioContext *ctx = bdrv_get_aio_context(bs);
-        int ret = 0;
        bool all_snapshots_includes_bs;

        aio_context_acquire(ctx);
@@ -637,9 +636,8 @@ int bdrv_all_goto_snapshot(const char *name,
        all_snapshots_includes_bs = bdrv_all_snapshots_includes_bs(bs);
        bdrv_graph_rdunlock_main_loop();

-        if (devices || all_snapshots_includes_bs) {
-            ret = bdrv_snapshot_goto(bs, name, errp);
-        }
+        ret = (devices || all_snapshots_includes_bs) ?
+              bdrv_snapshot_goto(bs, name, errp) : 0;
        aio_context_release(ctx);
        if (ret < 0) {
            bdrv_graph_rdlock_main_loop();
--- a/configs/devices/i386-softmmu/default.mak
+++ b/configs/devices/i386-softmmu/default.mak
@@ -18,6 +18,7 @@
 #CONFIG_QXL=n
 #CONFIG_SEV=n
 #CONFIG_SGA=n
+#CONFIG_TDX=n
 #CONFIG_TEST_DEVICES=n
 #CONFIG_TPM_CRB=n
 #CONFIG_TPM_TIS_ISA=n
--- a/docs/devel/testing.rst
+++ b/docs/devel/testing.rst
@@ -668,11 +668,11 @@ suppressing it.  More information on the file format can be found here:

 https://github.com/google/sanitizers/wiki/ThreadSanitizerSuppressions

-tests/tsan/blacklist.tsan - Has TSan warnings we wish to disable
+tests/tsan/ignore.tsan - Has TSan warnings we wish to disable
 at compile time for test or debug.
 Add flags to configure to enable:

-"--extra-cflags=-fsanitize-blacklist=<src path>/tests/tsan/blacklist.tsan"
+"--extra-cflags=-fsanitize-blacklist=<src path>/tests/tsan/ignore.tsan"

 More information on the file format can be found here under "Blacklist Format":

--- a/docs/sphinx/qapidoc.py
+++ b/docs/sphinx/qapidoc.py
@@ -515,7 +515,7 @@ class QAPIDocDirective(Directive):
        except QAPIError as err:
            # Launder QAPI parse errors into Sphinx extension errors
            # so they are displayed nicely to the user
-            raise ExtensionError(str(err))
+            raise ExtensionError(str(err)) from err

    def do_parse(self, rstlist, node):
        """Parse rST source lines and add them to the specified node
--- a/docs/system/confidential-guest-support.rst
+++ b/docs/system/confidential-guest-support.rst
@@ -38,6 +38,7 @@ Supported mechanisms
 Currently supported confidential guest mechanisms are:

 * AMD Secure Encrypted Virtualization (SEV) (see :doc:`i386/amd-memory-encryption`)
+* Intel Trust Domain Extension (TDX) (see :doc:`i386/tdx`)
 * POWER Protected Execution Facility (PEF) (see :ref:`power-papr-protected-execution-facility-pef`)
 * s390x Protected Virtualization (PV) (see :doc:`s390x/protvirt`)

--- a/docs/system/i386/tdx.rst
+++ b/docs/system/i386/tdx.rst
@@ -0,0 +1,113 @@
+Intel Trusted Domain eXtension (TDX)
+====================================
+
+Intel Trusted Domain eXtensions (TDX) refers to an Intel technology that extends
+Virtual Machine Extensions (VMX) and Multi-Key Total Memory Encryption (MKTME)
+with a new kind of virtual machine guest called a Trust Domain (TD). A TD runs
+in a CPU mode that is designed to protect the confidentiality of its memory
+contents and its CPU state from any other software, including the hosting
+Virtual Machine Monitor (VMM), unless explicitly shared by the TD itself.
+
+Prerequisites
+-------------
+
+To run TD, the physical machine needs to have TDX module loaded and initialized
+while KVM hypervisor has TDX support and has TDX enabled. If those requirements
+are met, the ``KVM_CAP_VM_TYPES`` will report the support of ``KVM_X86_TDX_VM``.
+
+Trust Domain Virtual Firmware (TDVF)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Trust Domain Virtual Firmware (TDVF) is required to provide TD services to boot
+TD Guest OS. TDVF needs to be copied to guest private memory and measured before
+a TD boots.
+
+The VM scope ``MEMORY_ENCRYPT_OP`` ioctl provides command ``KVM_TDX_INIT_MEM_REGION``
+to copy the TDVF image to TD's private memory space.
+
+Since TDX doesn't support readonly memslot, TDVF cannot be mapped as pflash
+device and it actually works as RAM. "-bios" option is chosen to load TDVF.
+
+OVMF is the opensource firmware that implements the TDVF support. Thus the
+command line to specify and load TDVF is ``-bios OVMF.fd``
+
+KVM private gmem
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+TD's memory (RAM) needs to be able to be transformed between private and shared.
+And its BIOS (OVMF/TDVF) needs to be mapped as private. Thus QEMU needs to
+allocate private gmem for them via KVM's IOCTL (KVM_CREATE_GUEST_MEMFD), which
+requires KVM is newer enough that reports KVM_CAP_GUEST_MEMFD.
+
+Feature Control
+---------------
+
+Unlike non-TDX VM, the CPU features (enumerated by CPU or MSR) of a TD is not
+under full control of VMM. VMM can only configure part of features of a TD on
+``KVM_TDX_INIT_VM`` command of VM scope ``MEMORY_ENCRYPT_OP`` ioctl.
+
+The configurable features have three types:
+
+- Attributes:
+  - PKS (bit 30) controls whether Supervisor Protection Keys is exposed to TD,
+  which determines related CPUID bit and CR4 bit;
+  - PERFMON (bit 63) controls whether PMU is exposed to TD.
+
+- XSAVE related features (XFAM):
+  XFAM is a 64b mask, which has the same format as XCR0 or IA32_XSS MSR. It
+  determines the set of extended features available for use by the guest TD.
+
+- CPUID features:
+  Only some bits of some CPUID leaves are directly configurable by VMM.
+
+What features can be configured is reported via TDX capabilities.
+
+TDX capabilities
+~~~~~~~~~~~~~~~~
+
+The VM scope ``MEMORY_ENCRYPT_OP`` ioctl provides command ``KVM_TDX_CAPABILITIES``
+to get the TDX capabilities from KVM. It returns a data structure of
+``struct kvm_tdx_capabilites``, which tells the supported configuration of
+attributes, XFAM and CPUIDs.
+
+Launching a TD (TDX VM)
+-----------------------
+
+To launch a TDX guest, below are new added and required:
+
+.. parsed-literal::
+
+    |qemu_system_x86| \\
+        -object tdx-guest,id=tdx0 \\
+        -machine ...,kernel-irqchip=split,confidential-guest-support=tdx0 \\
+        -bios OVMF.fd \\
+
+Debugging
+---------
+
+Bit 0 of TD attributes, is DEBUG bit, which decides if the TD runs in off-TD
+debug mode. When in off-TD debug mode, TD's VCPU state and private memory are
+accessible via given SEAMCALLs. This requires KVM to expose APIs to invoke those
+SEAMCALLs and resonponding QEMU change.
+
+It's targeted as future work.
+
+restrictions
+------------
+
+ - kernel-irqchip must be split;
+
+ - No readonly support for private memory;
+
+ - No SMM support: SMM support requires manipulating the guset register states
+   which is not allowed;
+
+Live Migration
+--------------
+
+TODO
+
+References
+----------
+
+- `TDX Homepage <https://www.intel.com/content/www/us/en/developer/articles/technical/intel-trust-domain-extensions.html>`__
--- a/docs/system/target-i386.rst
+++ b/docs/system/target-i386.rst
@@ -29,6 +29,7 @@ Architectural features
   i386/kvm-pv
   i386/sgx
   i386/amd-memory-encryption
+   i386/tdx

 OS requirements
 ~~~~~~~~~~~~~~~
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -96,6 +96,10 @@ uint64_t cpu_to_dump64(DumpState *s, uint64_t val)

 static int dump_cleanup(DumpState *s)
 {
+    if (s->dump_info.arch_cleanup_fn) {
+        s->dump_info.arch_cleanup_fn(s);
+    }
+
    guest_phys_blocks_free(&s->guest_phys_blocks);
    memory_mapping_list_free(&s->list);
    close(s->fd);
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -838,6 +838,19 @@ SRST
  save to disk physical memory dump starting at *addr* of size *size*.
 ERST

+    {
+        .name       = "pmemclear",
+        .args_type  = "val:l,size:i",
+        .params     = "addr size",
+        .help       = "clear physical memory range starting at 'addr' of size 'size'",
+        .cmd        = hmp_pmemclear,
+    },
+
+SRST
+``pmemclear`` *addr* *size*
+  clear physical memory range starting at *addr* of size *size*.
+ERST
+
    {
        .name       = "boot_set",
        .args_type  = "bootdevice:s",
--- a/host/include/generic/host/atomic128-cas.h
+++ b/host/include/generic/host/atomic128-cas.h
@@ -28,7 +28,7 @@ atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
 static inline Int128 ATTRIBUTE_ATOMIC128_OPT
 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
 {
-    __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
+    Int128Aligned *ptr_align = __builtin_assume_aligned(ptr, 16);
    Int128Alias r, c, n;

    c.s = cmp;
--- a/host/include/generic/host/atomic128-ldst.h
+++ b/host/include/generic/host/atomic128-ldst.h
@@ -58,7 +58,7 @@ atomic16_read_rw(Int128 *ptr)
 static inline void ATTRIBUTE_ATOMIC128_OPT
 atomic16_set(Int128 *ptr, Int128 val)
 {
-    __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
+    Int128Aligned *ptr_align = __builtin_assume_aligned(ptr, 16);
    __int128_t old;
    Int128Alias new;

--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -576,7 +576,8 @@ static void fdt_add_gic_node(VirtMachineState *vms)

        if (vms->virt) {
            qemu_fdt_setprop_cells(ms->fdt, nodename, "interrupts",
-                                   GIC_FDT_IRQ_TYPE_PPI, ARCH_GIC_MAINT_IRQ,
+                                   GIC_FDT_IRQ_TYPE_PPI,
+                                   INTID_TO_PPI(ARCH_GIC_MAINT_IRQ),
                                   GIC_FDT_IRQ_FLAGS_LEVEL_HI);
        }
    } else {
@@ -600,7 +601,8 @@ static void fdt_add_gic_node(VirtMachineState *vms)
                                         2, vms->memmap[VIRT_GIC_VCPU].base,
                                         2, vms->memmap[VIRT_GIC_VCPU].size);
            qemu_fdt_setprop_cells(ms->fdt, nodename, "interrupts",
-                                   GIC_FDT_IRQ_TYPE_PPI, ARCH_GIC_MAINT_IRQ,
+                                   GIC_FDT_IRQ_TYPE_PPI,
+                                   INTID_TO_PPI(ARCH_GIC_MAINT_IRQ),
                                   GIC_FDT_IRQ_FLAGS_LEVEL_HI);
        }
    }
--- a/hw/audio/es1370.c
+++ b/hw/audio/es1370.c
@@ -670,8 +670,13 @@ static void es1370_transfer_audio (ES1370State *s, struct chan *d, int loop_sel,
    cnt += (transferred + d->leftover) >> 2;

    if (s->sctl & loop_sel) {
-        /* Bah, how stupid is that having a 0 represent true value?
-           i just spent few hours on this shit */
+        /*
+         * loop_sel tells us which bit in the SCTL register to look at
+         * (either P1_LOOP_SEL, P2_LOOP_SEL or R1_LOOP_SEL). The sense
+         * of these bits is 0 for loop mode (set interrupt and keep recording
+         * when the sample count reaches zero) or 1 for stop mode (set
+         * interrupt and stop recording).
+         */
        AUD_log ("es1370: warning", "non looping mode\n");
    } else {
        d->frame_cnt = size;
--- a/hw/core/machine-hmp-cmds.c
+++ b/hw/core/machine-hmp-cmds.c
@@ -218,6 +218,16 @@ void hmp_pmemsave(Monitor *mon, const QDict *qdict)
    hmp_handle_error(mon, err);
 }

+void hmp_pmemclear(Monitor *mon, const QDict *qdict)
+{
+    uint32_t size = qdict_get_int(qdict, "size");
+    uint64_t addr = qdict_get_int(qdict, "val");
+    Error *err = NULL;
+
+    qmp_pmemclear(addr, size, &err);
+    hmp_handle_error(mon, err);
+}
+
 void hmp_system_wakeup(Monitor *mon, const QDict *qdict)
 {
    Error *err = NULL;
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -35,7 +35,8 @@
 GlobalProperty hw_compat_8_1[] = {
    { TYPE_PCI_BRIDGE, "x-pci-express-writeable-slt-bug", "true" },
    { "ramfb", "x-migrate", "off" },
-    { "vfio-pci-nohotplug", "x-ramfb-migrate", "off" }
+    { "vfio-pci-nohotplug", "x-ramfb-migrate", "off" },
+    { "igb", "x-pcie-flr-init", "off" },
 };
 const size_t hw_compat_8_1_len = G_N_ELEMENTS(hw_compat_8_1);

@@ -1188,6 +1189,11 @@ bool machine_mem_merge(MachineState *machine)
    return machine->mem_merge;
 }

+bool machine_require_guest_memfd(MachineState *machine)
+{
+    return machine->require_guest_memfd;
+}
+
 static char *cpu_slot_to_string(const CPUArchId *cpu)
 {
    GString *s = g_string_new(NULL);
--- a/hw/display/vmware_vga.c
+++ b/hw/display/vmware_vga.c
@@ -336,8 +336,8 @@ static inline bool vmsvga_verify_rect(DisplaySurface *surface,
        return false;
    }
    if (h > SVGA_MAX_HEIGHT) {
-        trace_vmware_verify_rect_greater_than_bound(name, "y", SVGA_MAX_HEIGHT,
-                                                    y);
+        trace_vmware_verify_rect_greater_than_bound(name, "h", SVGA_MAX_HEIGHT,
+                                                    h);
        return false;
    }
    if (y + h > surface_height(surface)) {
--- a/hw/hppa/machine.c
+++ b/hw/hppa/machine.c
@@ -34,9 +34,10 @@
 #include "net/net.h"
 #include "qemu/log.h"

-#define MIN_SEABIOS_HPPA_VERSION 10 /* require at least this fw version */
+#define MIN_SEABIOS_HPPA_VERSION 12 /* require at least this fw version */

-#define HPA_POWER_BUTTON (FIRMWARE_END - 0x10)
+/* Power button address at &PAGE0->pad[4] */
+#define HPA_POWER_BUTTON (0x40 + 4 * sizeof(uint32_t))

 #define enable_lasi_lan()       0

--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -10,6 +10,11 @@ config SGX
    bool
    depends on KVM

+config TDX
+    bool
+    select X86_FW_OVMF
+    depends on KVM
+
 config PC
    bool
    imply APPLESMC
@@ -26,6 +31,7 @@ config PC
    imply QXL
    imply SEV
    imply SGX
+    imply TDX
    imply TEST_DEVICES
    imply TPM_CRB
    imply TPM_TIS_ISA
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -975,7 +975,8 @@ static void build_dbg_aml(Aml *table)
    aml_append(table, scope);
 }

-static Aml *build_link_dev(const char *name, uint8_t uid, Aml *reg)
+static Aml *build_link_dev(const char *name, uint8_t uid, Aml *reg,
+                           bool level_trigger_unsupported)
 {
    Aml *dev;
    Aml *crs;
@@ -987,7 +988,10 @@ static Aml *build_link_dev(const char *name, uint8_t uid, Aml *reg)
    aml_append(dev, aml_name_decl("_UID", aml_int(uid)));

    crs = aml_resource_template();
-    aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL, AML_ACTIVE_HIGH,
+    aml_append(crs, aml_interrupt(AML_CONSUMER,
+                                  level_trigger_unsupported ?
+                                  AML_EDGE : AML_LEVEL,
+                                  AML_ACTIVE_HIGH,
                                  AML_SHARED, irqs, ARRAY_SIZE(irqs)));
    aml_append(dev, aml_name_decl("_PRS", crs));

@@ -1011,7 +1015,8 @@ static Aml *build_link_dev(const char *name, uint8_t uid, Aml *reg)
    return dev;
 }

-static Aml *build_gsi_link_dev(const char *name, uint8_t uid, uint8_t gsi)
+static Aml *build_gsi_link_dev(const char *name, uint8_t uid,
+                               uint8_t gsi, bool level_trigger_unsupported)
 {
    Aml *dev;
    Aml *crs;
@@ -1024,7 +1029,10 @@ static Aml *build_gsi_link_dev(const char *name, uint8_t uid, uint8_t gsi)

    crs = aml_resource_template();
    irqs = gsi;
-    aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL, AML_ACTIVE_HIGH,
+    aml_append(crs, aml_interrupt(AML_CONSUMER,
+                                  level_trigger_unsupported ?
+                                  AML_EDGE : AML_LEVEL,
+                                  AML_ACTIVE_HIGH,
                                  AML_SHARED, &irqs, 1));
    aml_append(dev, aml_name_decl("_PRS", crs));

@@ -1043,7 +1051,7 @@ static Aml *build_gsi_link_dev(const char *name, uint8_t uid, uint8_t gsi)
 }

 /* _CRS method - get current settings */
-static Aml *build_iqcr_method(bool is_piix4)
+static Aml *build_iqcr_method(bool is_piix4, bool level_trigger_unsupported)
 {
    Aml *if_ctx;
    uint32_t irqs;
@@ -1051,7 +1059,9 @@ static Aml *build_iqcr_method(bool is_piix4)
    Aml *crs = aml_resource_template();

    irqs = 0;
-    aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL,
+    aml_append(crs, aml_interrupt(AML_CONSUMER,
+                                  level_trigger_unsupported ?
+                                  AML_EDGE : AML_LEVEL,
                                  AML_ACTIVE_HIGH, AML_SHARED, &irqs, 1));
    aml_append(method, aml_name_decl("PRR0", crs));

@@ -1085,7 +1095,7 @@ static Aml *build_irq_status_method(void)
    return method;
 }

-static void build_piix4_pci0_int(Aml *table)
+static void build_piix4_pci0_int(Aml *table, bool level_trigger_unsupported)
 {
    Aml *dev;
    Aml *crs;
@@ -1098,12 +1108,16 @@ static void build_piix4_pci0_int(Aml *table)
    aml_append(sb_scope, pci0_scope);

    aml_append(sb_scope, build_irq_status_method());
-    aml_append(sb_scope, build_iqcr_method(true));
+    aml_append(sb_scope, build_iqcr_method(true, level_trigger_unsupported));

-    aml_append(sb_scope, build_link_dev("LNKA", 0, aml_name("PRQ0")));
-    aml_append(sb_scope, build_link_dev("LNKB", 1, aml_name("PRQ1")));
-    aml_append(sb_scope, build_link_dev("LNKC", 2, aml_name("PRQ2")));
-    aml_append(sb_scope, build_link_dev("LNKD", 3, aml_name("PRQ3")));
+    aml_append(sb_scope, build_link_dev("LNKA", 0, aml_name("PRQ0"),
+                                        level_trigger_unsupported));
+    aml_append(sb_scope, build_link_dev("LNKB", 1, aml_name("PRQ1"),
+                                        level_trigger_unsupported));
+    aml_append(sb_scope, build_link_dev("LNKC", 2, aml_name("PRQ2"),
+                                        level_trigger_unsupported));
+    aml_append(sb_scope, build_link_dev("LNKD", 3, aml_name("PRQ3"),
+                                        level_trigger_unsupported));

    dev = aml_device("LNKS");
    {
@@ -1112,7 +1126,9 @@ static void build_piix4_pci0_int(Aml *table)

        crs = aml_resource_template();
        irqs = 9;
-        aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL,
+        aml_append(crs, aml_interrupt(AML_CONSUMER,
+                                      level_trigger_unsupported ?
+                                      AML_EDGE : AML_LEVEL,
                                      AML_ACTIVE_HIGH, AML_SHARED,
                                      &irqs, 1));
        aml_append(dev, aml_name_decl("_PRS", crs));
@@ -1198,7 +1214,7 @@ static Aml *build_q35_routing_table(const char *str)
    return pkg;
 }

-static void build_q35_pci0_int(Aml *table)
+static void build_q35_pci0_int(Aml *table, bool level_trigger_unsupported)
 {
    Aml *method;
    Aml *sb_scope = aml_scope("_SB");
@@ -1237,25 +1253,41 @@ static void build_q35_pci0_int(Aml *table)
    aml_append(sb_scope, pci0_scope);

    aml_append(sb_scope, build_irq_status_method());
-    aml_append(sb_scope, build_iqcr_method(false));
+    aml_append(sb_scope, build_iqcr_method(false, level_trigger_unsupported));

-    aml_append(sb_scope, build_link_dev("LNKA", 0, aml_name("PRQA")));
-    aml_append(sb_scope, build_link_dev("LNKB", 1, aml_name("PRQB")));
-    aml_append(sb_scope, build_link_dev("LNKC", 2, aml_name("PRQC")));
-    aml_append(sb_scope, build_link_dev("LNKD", 3, aml_name("PRQD")));
-    aml_append(sb_scope, build_link_dev("LNKE", 4, aml_name("PRQE")));
-    aml_append(sb_scope, build_link_dev("LNKF", 5, aml_name("PRQF")));
-    aml_append(sb_scope, build_link_dev("LNKG", 6, aml_name("PRQG")));
-    aml_append(sb_scope, build_link_dev("LNKH", 7, aml_name("PRQH")));
+    aml_append(sb_scope, build_link_dev("LNKA", 0, aml_name("PRQA"),
+                                        level_trigger_unsupported));
+    aml_append(sb_scope, build_link_dev("LNKB", 1, aml_name("PRQB"),
+                                        level_trigger_unsupported));
+    aml_append(sb_scope, build_link_dev("LNKC", 2, aml_name("PRQC"),
+                                        level_trigger_unsupported));
+    aml_append(sb_scope, build_link_dev("LNKD", 3, aml_name("PRQD"),
+                                        level_trigger_unsupported));
+    aml_append(sb_scope, build_link_dev("LNKE", 4, aml_name("PRQE"),
+                                        level_trigger_unsupported));
+    aml_append(sb_scope, build_link_dev("LNKF", 5, aml_name("PRQF"),
+                                        level_trigger_unsupported));
+    aml_append(sb_scope, build_link_dev("LNKG", 6, aml_name("PRQG"),
+                                        level_trigger_unsupported));
+    aml_append(sb_scope, build_link_dev("LNKH", 7, aml_name("PRQH"),
+                                        level_trigger_unsupported));

-    aml_append(sb_scope, build_gsi_link_dev("GSIA", 0x10, 0x10));
-    aml_append(sb_scope, build_gsi_link_dev("GSIB", 0x11, 0x11));
-    aml_append(sb_scope, build_gsi_link_dev("GSIC", 0x12, 0x12));
-    aml_append(sb_scope, build_gsi_link_dev("GSID", 0x13, 0x13));
-    aml_append(sb_scope, build_gsi_link_dev("GSIE", 0x14, 0x14));
-    aml_append(sb_scope, build_gsi_link_dev("GSIF", 0x15, 0x15));
-    aml_append(sb_scope, build_gsi_link_dev("GSIG", 0x16, 0x16));
-    aml_append(sb_scope, build_gsi_link_dev("GSIH", 0x17, 0x17));
+    aml_append(sb_scope, build_gsi_link_dev("GSIA", 0x10, 0x10,
+                                            level_trigger_unsupported));
+    aml_append(sb_scope, build_gsi_link_dev("GSIB", 0x11, 0x11,
+                                            level_trigger_unsupported));
+    aml_append(sb_scope, build_gsi_link_dev("GSIC", 0x12, 0x12,
+                                            level_trigger_unsupported));
+    aml_append(sb_scope, build_gsi_link_dev("GSID", 0x13, 0x13,
+                                            level_trigger_unsupported));
+    aml_append(sb_scope, build_gsi_link_dev("GSIE", 0x14, 0x14,
+                                            level_trigger_unsupported));
+    aml_append(sb_scope, build_gsi_link_dev("GSIF", 0x15, 0x15,
+                                            level_trigger_unsupported));
+    aml_append(sb_scope, build_gsi_link_dev("GSIG", 0x16, 0x16,
+                                            level_trigger_unsupported));
+    aml_append(sb_scope, build_gsi_link_dev("GSIH", 0x17, 0x17,
+                                            level_trigger_unsupported));

    aml_append(table, sb_scope);
 }
@@ -1436,6 +1468,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
    PCMachineState *pcms = PC_MACHINE(machine);
    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);
    X86MachineState *x86ms = X86_MACHINE(machine);
+    bool level_trigger_unsupported = x86ms->eoi_intercept_unsupported;
    AcpiMcfgInfo mcfg;
    bool mcfg_valid = !!acpi_get_mcfg(&mcfg);
    uint32_t nr_mem = machine->ram_slots;
@@ -1468,7 +1501,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
        if (pm->pcihp_bridge_en || pm->pcihp_root_en) {
            build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base);
        }
-        build_piix4_pci0_int(dsdt);
+        build_piix4_pci0_int(dsdt, level_trigger_unsupported);
    } else if (q35) {
        sb_scope = aml_scope("_SB");
        dev = aml_device("PCI0");
@@ -1512,7 +1545,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
        if (pm->pcihp_bridge_en) {
            build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base);
        }
-        build_q35_pci0_int(dsdt);
+        build_q35_pci0_int(dsdt, level_trigger_unsupported);
    }

    if (misc->has_hpet) {
--- a/hw/i386/acpi-common.c
+++ b/hw/i386/acpi-common.c
@@ -103,6 +103,7 @@ void acpi_build_madt(GArray *table_data, BIOSLinker *linker,
    const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(MACHINE(x86ms));
    AcpiTable table = { .sig = "APIC", .rev = 3, .oem_id = oem_id,
                        .oem_table_id = oem_table_id };
+    bool level_trigger_unsupported = x86ms->eoi_intercept_unsupported;

    acpi_table_begin(&table, table_data);
    /* Local APIC Address */
@@ -122,18 +123,43 @@ void acpi_build_madt(GArray *table_data, BIOSLinker *linker,
                     IO_APIC_SECONDARY_ADDRESS, IO_APIC_SECONDARY_IRQBASE);
    }

-    if (x86ms->apic_xrupt_override) {
-        build_xrupt_override(table_data, 0, 2,
-            0 /* Flags: Conforms to the specifications of the bus */);
-    }
-
-    for (i = 1; i < 16; i++) {
-        if (!(x86ms->pci_irq_mask & (1 << i))) {
-            /* No need for a INT source override structure. */
-            continue;
+    if (level_trigger_unsupported) {
+        /* Force edge trigger */
+        if (x86ms->apic_xrupt_override) {
+            build_xrupt_override(table_data, 0, 2,
+                                 /* Flags: active high, edge triggered */
+                                 1 | (1 << 2));
+        }
+
+        for (i = x86ms->apic_xrupt_override ? 1 : 0; i < 16; i++) {
+            build_xrupt_override(table_data, i, i,
+                                 /* Flags: active high, edge triggered */
+                                 1 | (1 << 2));
+        }
+
+        if (x86ms->ioapic2) {
+            for (i = 0; i < 16; i++) {
+                build_xrupt_override(table_data, IO_APIC_SECONDARY_IRQBASE + i,
+                                     IO_APIC_SECONDARY_IRQBASE + i,
+                                     /* Flags: active high, edge triggered */
+                                     1 | (1 << 2));
+            }
+        }
+    } else {
+        if (x86ms->apic_xrupt_override) {
+            build_xrupt_override(table_data, 0, 2,
+                                 0 /* Flags: Conforms to the specifications of the bus */);
+        }
+
+        for (i = 1; i < 16; i++) {
+            if (!(x86ms->pci_irq_mask & (1 << i))) {
+                /* No need for a INT source override structure. */
+                continue;
+            }
+            build_xrupt_override(table_data, i, i,
+                                 0xd /* Flags: Active high, Level Triggered */);
+
        }
-        build_xrupt_override(table_data, i, i,
-            0xd /* Flags: Active high, Level Triggered */);
    }

    if (x2apic_mode) {
--- a/hw/i386/meson.build
+++ b/hw/i386/meson.build
@@ -27,6 +27,7 @@ i386_ss.add(when: 'CONFIG_PC', if_true: files(
  'port92.c'))
 i386_ss.add(when: 'CONFIG_X86_FW_OVMF', if_true: files('pc_sysfw_ovmf.c'),
                                        if_false: files('pc_sysfw_ovmf-stubs.c'))
+i386_ss.add(when: 'CONFIG_TDX', if_true: files('tdvf.c', 'tdvf-hob.c'))

 subdir('kvm')
 subdir('xen')
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -43,6 +43,7 @@
 #include "sysemu/xen.h"
 #include "sysemu/reset.h"
 #include "kvm/kvm_i386.h"
+#include "kvm/tdx.h"
 #include "hw/xen/xen.h"
 #include "qapi/qmp/qlist.h"
 #include "qemu/error-report.h"
@@ -1036,16 +1037,18 @@ void pc_memory_init(PCMachineState *pcms,
    /* Initialize PC system firmware */
    pc_system_firmware_init(pcms, rom_memory);

-    option_rom_mr = g_malloc(sizeof(*option_rom_mr));
-    memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
-                           &error_fatal);
-    if (pcmc->pci_enabled) {
-        memory_region_set_readonly(option_rom_mr, true);
+    if (!is_tdx_vm()) {
+        option_rom_mr = g_malloc(sizeof(*option_rom_mr));
+        memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
+                            &error_fatal);
+        if (pcmc->pci_enabled) {
+            memory_region_set_readonly(option_rom_mr, true);
+        }
+        memory_region_add_subregion_overlap(rom_memory,
+                                            PC_ROM_MIN_VGA,
+                                            option_rom_mr,
+                                            1);
    }
-    memory_region_add_subregion_overlap(rom_memory,
-                                        PC_ROM_MIN_VGA,
-                                        option_rom_mr,
-                                        1);

    fw_cfg = fw_cfg_arch_create(machine,
                                x86ms->boot_cpus, x86ms->apic_id_limit);
@@ -1755,11 +1758,6 @@ static void pc_machine_initfn(Object *obj)
    cxl_machine_init(obj, &pcms->cxl_devices_state);
 }

-int pc_machine_kvm_type(MachineState *machine, const char *kvm_type)
-{
-    return 0;
-}
-
 static void pc_machine_reset(MachineState *machine, ShutdownCause reason)
 {
    CPUState *cs;
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -236,6 +236,10 @@ static void pc_q35_init(MachineState *machine)
                            x86ms->above_4g_mem_size, NULL);
    object_property_set_bool(phb, PCI_HOST_BYPASS_IOMMU,
                             pcms->default_bus_bypass_iommu, NULL);
+    object_property_set_bool(phb, PCI_HOST_PROP_SMM_RANGES,
+                             x86_machine_is_smm_enabled(x86ms), NULL);
+    object_property_set_bool(phb, PCI_HOST_PROP_PAM_MEMORY_AREA,
+                             x86_machine_is_pam_enabled(x86ms), NULL);
    sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal);

    /* pci */
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -37,6 +37,7 @@
 #include "hw/block/flash.h"
 #include "sysemu/kvm.h"
 #include "sev.h"
+#include "kvm/tdx.h"

 #define FLASH_SECTOR_SIZE 4096

@@ -265,5 +266,11 @@ void x86_firmware_configure(void *ptr, int size)
        }

        sev_encrypt_flash(ptr, size, &error_fatal);
+    } else if (is_tdx_vm()) {
+        ret = tdx_parse_tdvf(ptr, size);
+        if (ret) {
+            error_report("failed to parse TDVF for TDX VM");
+            exit(1);
+        }
    }
 }
--- a/hw/i386/tdvf-hob.c
+++ b/hw/i386/tdvf-hob.c
@@ -0,0 +1,213 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+
+ * Copyright (c) 2020 Intel Corporation
+ * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
+ *                        <isaku.yamahata at intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/error-report.h"
+#include "e820_memory_layout.h"
+#include "hw/i386/pc.h"
+#include "hw/i386/x86.h"
+#include "hw/pci/pcie_host.h"
+#include "sysemu/kvm.h"
+#include "standard-headers/uefi/uefi.h"
+#include "tdvf-hob.h"
+
+typedef struct TdvfHob {
+    hwaddr hob_addr;
+    void *ptr;
+    int size;
+
+    /* working area */
+    void *current;
+    void *end;
+} TdvfHob;
+
+static uint64_t tdvf_current_guest_addr(const TdvfHob *hob)
+{
+    return hob->hob_addr + (hob->current - hob->ptr);
+}
+
+static void tdvf_align(TdvfHob *hob, size_t align)
+{
+    hob->current = QEMU_ALIGN_PTR_UP(hob->current, align);
+}
+
+static void *tdvf_get_area(TdvfHob *hob, uint64_t size)
+{
+    void *ret;
+
+    if (hob->current + size > hob->end) {
+        error_report("TD_HOB overrun, size = 0x%" PRIx64, size);
+        exit(1);
+    }
+
+    ret = hob->current;
+    hob->current += size;
+    tdvf_align(hob, 8);
+    return ret;
+}
+
+static void tdvf_hob_add_mmio_resource(TdvfHob *hob, uint64_t start,
+                                       uint64_t end)
+{
+    EFI_HOB_RESOURCE_DESCRIPTOR *region;
+
+    if (!start) {
+        return;
+    }
+
+    region = tdvf_get_area(hob, sizeof(*region));
+    *region = (EFI_HOB_RESOURCE_DESCRIPTOR) {
+        .Header = {
+            .HobType = EFI_HOB_TYPE_RESOURCE_DESCRIPTOR,
+            .HobLength = cpu_to_le16(sizeof(*region)),
+            .Reserved = cpu_to_le32(0),
+        },
+        .Owner = EFI_HOB_OWNER_ZERO,
+        .ResourceType = cpu_to_le32(EFI_RESOURCE_MEMORY_MAPPED_IO),
+        .ResourceAttribute = cpu_to_le32(EFI_RESOURCE_ATTRIBUTE_TDVF_MMIO),
+        .PhysicalStart = cpu_to_le64(start),
+        .ResourceLength = cpu_to_le64(end - start),
+    };
+}
+
+static void tdvf_hob_add_mmio_resources(TdvfHob *hob)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    X86MachineState *x86ms = X86_MACHINE(ms);
+    PCIHostState *pci_host;
+    uint64_t start, end;
+    uint64_t mcfg_base, mcfg_size;
+    Object *host;
+
+    /* Effectively PCI hole + other MMIO devices. */
+    tdvf_hob_add_mmio_resource(hob, x86ms->below_4g_mem_size,
+                               APIC_DEFAULT_ADDRESS);
+
+    /* Stolen from acpi_get_i386_pci_host(), there's gotta be an easier way. */
+    pci_host = OBJECT_CHECK(PCIHostState,
+                            object_resolve_path("/machine/i440fx", NULL),
+                            TYPE_PCI_HOST_BRIDGE);
+    if (!pci_host) {
+        pci_host = OBJECT_CHECK(PCIHostState,
+                                object_resolve_path("/machine/q35", NULL),
+                                TYPE_PCI_HOST_BRIDGE);
+    }
+    g_assert(pci_host);
+
+    host = OBJECT(pci_host);
+
+    /* PCI hole above 4gb. */
+    start = object_property_get_uint(host, PCI_HOST_PROP_PCI_HOLE64_START,
+                                     NULL);
+    end = object_property_get_uint(host, PCI_HOST_PROP_PCI_HOLE64_END, NULL);
+    tdvf_hob_add_mmio_resource(hob, start, end);
+
+    /* MMCFG region */
+    mcfg_base = object_property_get_uint(host, PCIE_HOST_MCFG_BASE, NULL);
+    mcfg_size = object_property_get_uint(host, PCIE_HOST_MCFG_SIZE, NULL);
+    if (mcfg_base && mcfg_base != PCIE_BASE_ADDR_UNMAPPED && mcfg_size) {
+        tdvf_hob_add_mmio_resource(hob, mcfg_base, mcfg_base + mcfg_size);
+    }
+}
+
+static void tdvf_hob_add_memory_resources(TdxGuest *tdx, TdvfHob *hob)
+{
+    EFI_HOB_RESOURCE_DESCRIPTOR *region;
+    EFI_RESOURCE_ATTRIBUTE_TYPE attr;
+    EFI_RESOURCE_TYPE resource_type;
+
+    TdxRamEntry *e;
+    int i;
+
+    for (i = 0; i < tdx->nr_ram_entries; i++) {
+        e = &tdx->ram_entries[i];
+
+        if (e->type == TDX_RAM_UNACCEPTED) {
+            resource_type = EFI_RESOURCE_MEMORY_UNACCEPTED;
+            attr = EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED;
+        } else if (e->type == TDX_RAM_ADDED){
+            resource_type = EFI_RESOURCE_SYSTEM_MEMORY;
+            attr = EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE;
+        } else {
+            error_report("unknown TDX_RAM_ENTRY type %d", e->type);
+            exit(1);
+        }
+
+        region = tdvf_get_area(hob, sizeof(*region));
+        *region = (EFI_HOB_RESOURCE_DESCRIPTOR) {
+            .Header = {
+                .HobType = EFI_HOB_TYPE_RESOURCE_DESCRIPTOR,
+                .HobLength = cpu_to_le16(sizeof(*region)),
+                .Reserved = cpu_to_le32(0),
+            },
+            .Owner = EFI_HOB_OWNER_ZERO,
+            .ResourceType = cpu_to_le32(resource_type),
+            .ResourceAttribute = cpu_to_le32(attr),
+            .PhysicalStart = cpu_to_le64(e->address),
+            .ResourceLength = cpu_to_le64(e->length),
+        };
+    }
+}
+
+void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob)
+{
+    TdvfHob hob = {
+        .hob_addr = td_hob->address,
+        .size = td_hob->size,
+        .ptr = td_hob->mem_ptr,
+
+        .current = td_hob->mem_ptr,
+        .end = td_hob->mem_ptr + td_hob->size,
+    };
+
+    EFI_HOB_GENERIC_HEADER *last_hob;
+    EFI_HOB_HANDOFF_INFO_TABLE *hit;
+
+    /* Note, Efi{Free}Memory{Bottom,Top} are ignored, leave 'em zeroed. */
+    hit = tdvf_get_area(&hob, sizeof(*hit));
+    *hit = (EFI_HOB_HANDOFF_INFO_TABLE) {
+        .Header = {
+            .HobType = EFI_HOB_TYPE_HANDOFF,
+            .HobLength = cpu_to_le16(sizeof(*hit)),
+            .Reserved = cpu_to_le32(0),
+        },
+        .Version = cpu_to_le32(EFI_HOB_HANDOFF_TABLE_VERSION),
+        .BootMode = cpu_to_le32(0),
+        .EfiMemoryTop = cpu_to_le64(0),
+        .EfiMemoryBottom = cpu_to_le64(0),
+        .EfiFreeMemoryTop = cpu_to_le64(0),
+        .EfiFreeMemoryBottom = cpu_to_le64(0),
+        .EfiEndOfHobList = cpu_to_le64(0), /* initialized later */
+    };
+
+    tdvf_hob_add_memory_resources(tdx, &hob);
+
+    tdvf_hob_add_mmio_resources(&hob);
+
+    last_hob = tdvf_get_area(&hob, sizeof(*last_hob));
+    *last_hob =  (EFI_HOB_GENERIC_HEADER) {
+        .HobType = EFI_HOB_TYPE_END_OF_HOB_LIST,
+        .HobLength = cpu_to_le16(sizeof(*last_hob)),
+        .Reserved = cpu_to_le32(0),
+    };
+    hit->EfiEndOfHobList = tdvf_current_guest_addr(&hob);
+}
--- a/hw/i386/tdvf-hob.h
+++ b/hw/i386/tdvf-hob.h
@@ -0,0 +1,24 @@
+#ifndef HW_I386_TD_HOB_H
+#define HW_I386_TD_HOB_H
+
+#include "hw/i386/tdvf.h"
+#include "target/i386/kvm/tdx.h"
+
+void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob);
+
+#define EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE     \
+    (EFI_RESOURCE_ATTRIBUTE_PRESENT |           \
+     EFI_RESOURCE_ATTRIBUTE_INITIALIZED |       \
+     EFI_RESOURCE_ATTRIBUTE_TESTED)
+
+#define EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED  \
+    (EFI_RESOURCE_ATTRIBUTE_PRESENT |           \
+     EFI_RESOURCE_ATTRIBUTE_INITIALIZED |       \
+     EFI_RESOURCE_ATTRIBUTE_TESTED)
+
+#define EFI_RESOURCE_ATTRIBUTE_TDVF_MMIO        \
+    (EFI_RESOURCE_ATTRIBUTE_PRESENT     |       \
+     EFI_RESOURCE_ATTRIBUTE_INITIALIZED |       \
+     EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE)
+
+#endif
--- a/hw/i386/tdvf.c
+++ b/hw/i386/tdvf.c
@@ -0,0 +1,200 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+
+ * Copyright (c) 2020 Intel Corporation
+ * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
+ *                        <isaku.yamahata at intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+
+#include "hw/i386/pc.h"
+#include "hw/i386/tdvf.h"
+#include "sysemu/kvm.h"
+
+#define TDX_METADATA_OFFSET_GUID    "e47a6535-984a-4798-865e-4685a7bf8ec2"
+#define TDX_METADATA_VERSION        1
+#define TDVF_SIGNATURE              0x46564454 /* TDVF as little endian */
+
+typedef struct {
+    uint32_t DataOffset;
+    uint32_t RawDataSize;
+    uint64_t MemoryAddress;
+    uint64_t MemoryDataSize;
+    uint32_t Type;
+    uint32_t Attributes;
+} TdvfSectionEntry;
+
+typedef struct {
+    uint32_t Signature;
+    uint32_t Length;
+    uint32_t Version;
+    uint32_t NumberOfSectionEntries;
+    TdvfSectionEntry SectionEntries[];
+} TdvfMetadata;
+
+struct tdx_metadata_offset {
+    uint32_t offset;
+};
+
+static TdvfMetadata *tdvf_get_metadata(void *flash_ptr, int size)
+{
+    TdvfMetadata *metadata;
+    uint32_t offset = 0;
+    uint8_t *data;
+
+    if ((uint32_t) size != size) {
+        return NULL;
+    }
+
+    if (pc_system_ovmf_table_find(TDX_METADATA_OFFSET_GUID, &data, NULL)) {
+        offset = size - le32_to_cpu(((struct tdx_metadata_offset *)data)->offset);
+
+        if (offset + sizeof(*metadata) > size) {
+            return NULL;
+        }
+    } else {
+        error_report("Cannot find TDX_METADATA_OFFSET_GUID");
+        return NULL;
+    }
+
+    metadata = flash_ptr + offset;
+
+    /* Finally, verify the signature to determine if this is a TDVF image. */
+    metadata->Signature = le32_to_cpu(metadata->Signature);
+    if (metadata->Signature != TDVF_SIGNATURE) {
+        error_report("Invalid TDVF signature in metadata!");
+        return NULL;
+    }
+
+    /* Sanity check that the TDVF doesn't overlap its own metadata. */
+    metadata->Length = le32_to_cpu(metadata->Length);
+    if (offset + metadata->Length > size) {
+        return NULL;
+    }
+
+    /* Only version 1 is supported/defined. */
+    metadata->Version = le32_to_cpu(metadata->Version);
+    if (metadata->Version != TDX_METADATA_VERSION) {
+        return NULL;
+    }
+
+    return metadata;
+}
+
+static int tdvf_parse_and_check_section_entry(const TdvfSectionEntry *src,
+                                              TdxFirmwareEntry *entry)
+{
+    entry->data_offset = le32_to_cpu(src->DataOffset);
+    entry->data_len = le32_to_cpu(src->RawDataSize);
+    entry->address = le64_to_cpu(src->MemoryAddress);
+    entry->size = le64_to_cpu(src->MemoryDataSize);
+    entry->type = le32_to_cpu(src->Type);
+    entry->attributes = le32_to_cpu(src->Attributes);
+
+    /* sanity check */
+    if (entry->size < entry->data_len) {
+        error_report("Broken metadata RawDataSize 0x%x MemoryDataSize 0x%lx",
+                     entry->data_len, entry->size);
+        return -1;
+    }
+    if (!QEMU_IS_ALIGNED(entry->address, TARGET_PAGE_SIZE)) {
+        error_report("MemoryAddress 0x%lx not page aligned", entry->address);
+        return -1;
+    }
+    if (!QEMU_IS_ALIGNED(entry->size, TARGET_PAGE_SIZE)) {
+        error_report("MemoryDataSize 0x%lx not page aligned", entry->size);
+        return -1;
+    }
+
+    switch (entry->type) {
+    case TDVF_SECTION_TYPE_BFV:
+    case TDVF_SECTION_TYPE_CFV:
+        /* The sections that must be copied from firmware image to TD memory */
+        if (entry->data_len == 0) {
+            error_report("%d section with RawDataSize == 0", entry->type);
+            return -1;
+        }
+        break;
+    case TDVF_SECTION_TYPE_TD_HOB:
+    case TDVF_SECTION_TYPE_TEMP_MEM:
+        /* The sections that no need to be copied from firmware image */
+        if (entry->data_len != 0) {
+            error_report("%d section with RawDataSize 0x%x != 0",
+                         entry->type, entry->data_len);
+            return -1;
+        }
+        break;
+    default:
+        error_report("TDVF contains unsupported section type %d", entry->type);
+        return -1;
+    }
+
+    return 0;
+}
+
+int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size)
+{
+    TdvfSectionEntry *sections;
+    TdvfMetadata *metadata;
+    ssize_t entries_size;
+    uint32_t len, i;
+
+    metadata = tdvf_get_metadata(flash_ptr, size);
+    if (!metadata) {
+        return -EINVAL;
+    }
+
+    //load and parse metadata entries
+    fw->nr_entries = le32_to_cpu(metadata->NumberOfSectionEntries);
+    if (fw->nr_entries < 2) {
+        error_report("Invalid number of fw entries (%u) in TDVF", fw->nr_entries);
+        return -EINVAL;
+    }
+
+    len = le32_to_cpu(metadata->Length);
+    entries_size = fw->nr_entries * sizeof(TdvfSectionEntry);
+    if (len != sizeof(*metadata) + entries_size) {
+        error_report("TDVF metadata len (0x%x) mismatch, expected (0x%x)",
+                     len, (uint32_t)(sizeof(*metadata) + entries_size));
+        return -EINVAL;
+    }
+
+    fw->entries = g_new(TdxFirmwareEntry, fw->nr_entries);
+    sections = g_new(TdvfSectionEntry, fw->nr_entries);
+
+    if (!memcpy(sections, (void *)metadata + sizeof(*metadata), entries_size))  {
+        error_report("Failed to read TDVF section entries");
+        goto err;
+    }
+
+    for (i = 0; i < fw->nr_entries; i++) {
+        if (tdvf_parse_and_check_section_entry(&sections[i], &fw->entries[i])) {
+            goto err;
+        }
+    }
+    g_free(sections);
+
+    fw->mem_ptr = flash_ptr;
+    return 0;
+
+err:
+    g_free(sections);
+    fw->entries = 0;
+    g_free(fw->entries);
+    return -EINVAL;
+}
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -47,6 +47,7 @@
 #include "hw/intc/i8259.h"
 #include "hw/rtc/mc146818rtc.h"
 #include "target/i386/sev.h"
+#include "kvm/tdx.h"

 #include "hw/acpi/cpu_hotplug.h"
 #include "hw/irq.h"
@@ -1145,9 +1146,17 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
        (bios_size % 65536) != 0) {
        goto bios_error;
    }
+
    bios = g_malloc(sizeof(*bios));
-    memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal);
-    if (sev_enabled()) {
+    if (is_tdx_vm()) {
+        memory_region_init_ram_guest_memfd(bios, NULL, "pc.bios", bios_size,
+                                           &error_fatal);
+        tdx_set_tdvf_region(bios);
+    } else {
+        memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal);
+    }
+
+    if (sev_enabled() || is_tdx_vm()) {
        /*
         * The concept of a "reset" simply doesn't exist for
         * confidential computing guests, we have to destroy and
@@ -1169,17 +1178,20 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
    }
    g_free(filename);

-    /* map the last 128KB of the BIOS in ISA space */
-    isa_bios_size = MIN(bios_size, 128 * KiB);
-    isa_bios = g_malloc(sizeof(*isa_bios));
-    memory_region_init_alias(isa_bios, NULL, "isa-bios", bios,
-                             bios_size - isa_bios_size, isa_bios_size);
-    memory_region_add_subregion_overlap(rom_memory,
-                                        0x100000 - isa_bios_size,
-                                        isa_bios,
-                                        1);
-    if (!isapc_ram_fw) {
-        memory_region_set_readonly(isa_bios, true);
+    /* For TDX, alias different GPAs to same private memory is not supported */
+    if (!is_tdx_vm()) {
+        /* map the last 128KB of the BIOS in ISA space */
+        isa_bios_size = MIN(bios_size, 128 * KiB);
+        isa_bios = g_malloc(sizeof(*isa_bios));
+        memory_region_init_alias(isa_bios, NULL, "isa-bios", bios,
+                                bios_size - isa_bios_size, isa_bios_size);
+        memory_region_add_subregion_overlap(rom_memory,
+                                            0x100000 - isa_bios_size,
+                                            isa_bios,
+                                            1);
+        if (!isapc_ram_fw) {
+            memory_region_set_readonly(isa_bios, true);
+        }
    }

    /* map all the bios at the top of memory */
@@ -1294,6 +1306,49 @@ static void x86_machine_set_pic(Object *obj, Visitor *v, const char *name,
    visit_type_OnOffAuto(v, name, &x86ms->pic, errp);
 }

+bool x86_machine_is_pam_enabled(const X86MachineState *x86ms)
+{
+    bool pam_available = false;
+
+    if (x86ms->pam == ON_OFF_AUTO_OFF) {
+        return false;
+    }
+
+    if (tcg_enabled() || qtest_enabled()) {
+        pam_available = true;
+    } else if (kvm_enabled()) {
+        pam_available = kvm_has_pam();
+    }
+
+    if (pam_available) {
+        return true;
+    }
+
+    if (x86ms->pam == ON_OFF_AUTO_ON) {
+        error_report("Programmable Attribute Map(PAM) Memory Area not supported "
+                     "by this hypervisor.");
+        exit(1);
+    }
+    return false;
+}
+
+static void x86_machine_get_pam(Object *obj, Visitor *v, const char *name,
+                               void *opaque, Error **errp)
+{
+    X86MachineState *x86ms = X86_MACHINE(obj);
+    OnOffAuto pam = x86ms->pam;
+
+    visit_type_OnOffAuto(v, name, &pam, errp);
+}
+
+static void x86_machine_set_pam(Object *obj, Visitor *v, const char *name,
+                               void *opaque, Error **errp)
+{
+    X86MachineState *x86ms = X86_MACHINE(obj);
+
+    visit_type_OnOffAuto(v, name, &x86ms->pam, errp);
+}
+
 static char *x86_machine_get_oem_id(Object *obj, Error **errp)
 {
    X86MachineState *x86ms = X86_MACHINE(obj);
@@ -1377,6 +1432,17 @@ static void machine_set_sgx_epc(Object *obj, Visitor *v, const char *name,
    qapi_free_SgxEPCList(list);
 }

+static int x86_kvm_type(MachineState *ms, const char *vm_type)
+{
+    X86MachineState *x86ms = X86_MACHINE(ms);
+    int kvm_type;
+
+    kvm_type = kvm_get_vm_type(ms, vm_type);
+    x86ms->vm_type = kvm_type;
+
+    return kvm_type;
+}
+
 static void x86_machine_initfn(Object *obj)
 {
    X86MachineState *x86ms = X86_MACHINE(obj);
@@ -1390,6 +1456,7 @@ static void x86_machine_initfn(Object *obj)
    x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
    x86ms->bus_lock_ratelimit = 0;
    x86ms->above_4g_mem_start = 4 * GiB;
+    x86ms->eoi_intercept_unsupported = false;
 }

 static void x86_machine_class_init(ObjectClass *oc, void *data)
@@ -1401,6 +1468,7 @@ static void x86_machine_class_init(ObjectClass *oc, void *data)
    mc->cpu_index_to_instance_props = x86_cpu_index_to_props;
    mc->get_default_cpu_node_id = x86_get_default_cpu_node_id;
    mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids;
+    mc->kvm_type = x86_kvm_type;
    x86mc->save_tsc_khz = true;
    x86mc->fwcfg_dma_enabled = true;
    nc->nmi_monitor_handler = x86_nmi;
@@ -1431,6 +1499,12 @@ static void x86_machine_class_init(ObjectClass *oc, void *data)
    object_class_property_set_description(oc, X86_MACHINE_PIC,
        "Enable i8259 PIC");

+    object_class_property_add(oc, X86_MACHINE_PAM, "OnOffAuto",
+        x86_machine_get_pam, x86_machine_set_pam,
+        NULL, NULL);
+    object_class_property_set_description(oc, X86_MACHINE_PAM,
+        "Enable PAM");
+
    object_class_property_add_str(oc, X86_MACHINE_OEM_ID,
                                  x86_machine_get_oem_id,
                                  x86_machine_set_oem_id);
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -235,6 +235,15 @@ void apic_designate_bsp(DeviceState *dev, bool bsp)
    }
 }

+void apic_force_x2apic(DeviceState *dev)
+{
+    if (dev == NULL) {
+        return;
+    }
+
+    APIC_COMMON(dev)->force_x2apic = true;
+}
+
 static void apic_reset_common(DeviceState *dev)
 {
    APICCommonState *s = APIC_COMMON(dev);
@@ -243,6 +252,9 @@ static void apic_reset_common(DeviceState *dev)

    bsp = s->apicbase & MSR_IA32_APICBASE_BSP;
    s->apicbase = APIC_DEFAULT_ADDRESS | bsp | MSR_IA32_APICBASE_ENABLE;
+    if (s->force_x2apic) {
+        s->apicbase |= MSR_IA32_APICBASE_EXTD;
+    }
    s->id = s->initial_apic_id;

    kvm_reset_irq_delivered();
--- a/hw/mips/Kconfig
+++ b/hw/mips/Kconfig
@@ -46,6 +46,7 @@ config LOONGSON3V
    select PCI_EXPRESS_GENERIC_BRIDGE
    select MSI_NONBROKEN
    select FW_CFG_MIPS
+    select UNIMP

 config MIPS_CPS
    bool
--- a/hw/net/igb.c
+++ b/hw/net/igb.c
@@ -78,6 +78,7 @@ struct IGBState {
    uint32_t ioaddr;

    IGBCore core;
+    bool has_flr;
 };

 #define IGB_CAP_SRIOV_OFFSET    (0x160)
@@ -101,6 +102,9 @@ static void igb_write_config(PCIDevice *dev, uint32_t addr,

    trace_igb_write_config(addr, val, len);
    pci_default_write_config(dev, addr, val, len);
+    if (s->has_flr) {
+        pcie_cap_flr_write_config(dev, addr, val, len);
+    }

    if (range_covers_byte(addr, len, PCI_COMMAND) &&
        (dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
@@ -122,6 +126,12 @@ igb_mmio_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
    igb_core_write(&s->core, addr, val, size);
 }

+void igb_vf_reset(void *opaque, uint16_t vfn)
+{
+    IGBState *s = opaque;
+    igb_core_vf_reset(&s->core, vfn);
+}
+
 static bool
 igb_io_get_reg_index(IGBState *s, uint32_t *idx)
 {
@@ -427,6 +437,10 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error **errp)
    }

    /* PCIe extended capabilities (in order) */
+    if (s->has_flr) {
+        pcie_cap_flr_init(pci_dev);
+    }
+
    if (pcie_aer_init(pci_dev, 1, 0x100, 0x40, errp) < 0) {
        hw_error("Failed to initialize AER capability");
    }
@@ -582,6 +596,7 @@ static const VMStateDescription igb_vmstate = {

 static Property igb_properties[] = {
    DEFINE_NIC_PROPERTIES(IGBState, conf),
+    DEFINE_PROP_BOOL("x-pcie-flr-init", IGBState, has_flr, true),
    DEFINE_PROP_END_OF_LIST(),
 };

--- a/hw/net/igb_common.h
+++ b/hw/net/igb_common.h
@@ -152,5 +152,6 @@ enum {

 uint64_t igb_mmio_read(void *opaque, hwaddr addr, unsigned size);
 void igb_mmio_write(void *opaque, hwaddr addr, uint64_t val, unsigned size);
+void igb_vf_reset(void *opaque, uint16_t vfn);

 #endif
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -2477,11 +2477,13 @@ static void igb_set_vfmailbox(IGBCore *core, int index, uint32_t val)
    }
 }

-static void igb_vf_reset(IGBCore *core, uint16_t vfn)
+void igb_core_vf_reset(IGBCore *core, uint16_t vfn)
 {
    uint16_t qn0 = vfn;
    uint16_t qn1 = vfn + IGB_NUM_VM_POOLS;

+    trace_igb_core_vf_reset(vfn);
+
    /* disable Rx and Tx for the VF*/
    core->mac[RXDCTL0 + (qn0 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE;
    core->mac[RXDCTL0 + (qn1 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE;
@@ -2560,7 +2562,7 @@ static void igb_set_vtctrl(IGBCore *core, int index, uint32_t val)

    if (val & E1000_CTRL_RST) {
        vfn = (index - PVTCTRL0) / 0x40;
-        igb_vf_reset(core, vfn);
+        igb_core_vf_reset(core, vfn);
    }
 }

--- a/hw/net/igb_core.h
+++ b/hw/net/igb_core.h
@@ -130,6 +130,9 @@ igb_core_set_link_status(IGBCore *core);
 void
 igb_core_pci_uninit(IGBCore *core);

+void
+igb_core_vf_reset(IGBCore *core, uint16_t vfn);
+
 bool
 igb_can_receive(IGBCore *core);

--- a/hw/net/igbvf.c
+++ b/hw/net/igbvf.c
@@ -204,6 +204,10 @@ static void igbvf_write_config(PCIDevice *dev, uint32_t addr, uint32_t val,
 {
    trace_igbvf_write_config(addr, val, len);
    pci_default_write_config(dev, addr, val, len);
+    if (object_property_get_bool(OBJECT(pcie_sriov_get_pf(dev)),
+                                 "x-pcie-flr-init", &error_abort)) {
+        pcie_cap_flr_write_config(dev, addr, val, len);
+    }
 }

 static uint64_t igbvf_mmio_read(void *opaque, hwaddr addr, unsigned size)
@@ -266,6 +270,11 @@ static void igbvf_pci_realize(PCIDevice *dev, Error **errp)
        hw_error("Failed to initialize PCIe capability");
    }

+    if (object_property_get_bool(OBJECT(pcie_sriov_get_pf(dev)),
+                                 "x-pcie-flr-init", &error_abort)) {
+        pcie_cap_flr_init(dev);
+    }
+
    if (pcie_aer_init(dev, 1, 0x100, 0x40, errp) < 0) {
        hw_error("Failed to initialize AER capability");
    }
@@ -273,6 +282,13 @@ static void igbvf_pci_realize(PCIDevice *dev, Error **errp)
    pcie_ari_init(dev, 0x150);
 }

+static void igbvf_qdev_reset_hold(Object *obj)
+{
+    PCIDevice *vf = PCI_DEVICE(obj);
+
+    igb_vf_reset(pcie_sriov_get_pf(vf), pcie_sriov_vf_number(vf));
+}
+
 static void igbvf_pci_uninit(PCIDevice *dev)
 {
    IgbVfState *s = IGBVF(dev);
@@ -287,6 +303,7 @@ static void igbvf_class_init(ObjectClass *class, void *data)
 {
    DeviceClass *dc = DEVICE_CLASS(class);
    PCIDeviceClass *c = PCI_DEVICE_CLASS(class);
+    ResettableClass *rc = RESETTABLE_CLASS(class);

    c->realize = igbvf_pci_realize;
    c->exit = igbvf_pci_uninit;
@@ -295,6 +312,8 @@ static void igbvf_class_init(ObjectClass *class, void *data)
    c->revision = 1;
    c->class_id = PCI_CLASS_NETWORK_ETHERNET;

+    rc->phases.hold = igbvf_qdev_reset_hold;
+
    dc->desc = "Intel 82576 Virtual Function";
    dc->user_creatable = false;

--- a/hw/net/trace-events
+++ b/hw/net/trace-events
@@ -274,6 +274,7 @@ igb_core_mdic_read(uint32_t addr, uint32_t data) "MDIC READ: PHY[%u] = 0x%x"
 igb_core_mdic_read_unhandled(uint32_t addr) "MDIC READ: PHY[%u] UNHANDLED"
 igb_core_mdic_write(uint32_t addr, uint32_t data) "MDIC WRITE: PHY[%u] = 0x%x"
 igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED"
+igb_core_vf_reset(uint16_t vfn) "VF%d"

 igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) "Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset done: %d"

--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -375,7 +375,8 @@ static void fw_cfg_dma_transfer(FWCfgState *s)
    } else {
        dma.length = 0;
    }
-
+    trace_fw_cfg_transfer(dma.address, dma.length, arch,
+                          (s->cur_entry & FW_CFG_ENTRY_MASK), read, write);
    dma.control = 0;

    while (dma.length > 0 && !(dma.control & FW_CFG_DMA_CTL_ERROR)) {
--- a/hw/nvram/trace-events
+++ b/hw/nvram/trace-events
@@ -13,6 +13,7 @@ fw_cfg_add_string(uint16_t key_value, const char *key_name, const char *value) "
 fw_cfg_add_i16(uint16_t key_value, const char *key_name, uint16_t value) "key 0x%04" PRIx16 " '%s', value 0x%" PRIx16
 fw_cfg_add_i32(uint16_t key_value, const char *key_name, uint32_t value) "key 0x%04" PRIx16 " '%s', value 0x%" PRIx32
 fw_cfg_add_i64(uint16_t key_value, const char *key_name, uint64_t value) "key 0x%04" PRIx16 " '%s', value 0x%" PRIx64
+fw_cfg_transfer(uint64_t dma_addr, uint64_t length, int arch, uint64_t entry, int read, int write) "dma 0x%" PRIx64 " length 0x%" PRIx64 " arch %d entry 0x%" PRIx64 " read %d write %d"

 # mac_nvram.c
 macio_nvram_read(uint32_t addr, uint8_t val) "read addr=0x%04"PRIx32" val=0x%02x"
--- a/hw/pci-host/astro.c
+++ b/hw/pci-host/astro.c
@@ -32,6 +32,7 @@
 #include "hw/pci-host/astro.h"
 #include "hw/hppa/hppa_hardware.h"
 #include "migration/vmstate.h"
+#include "target/hppa/cpu.h"
 #include "trace.h"
 #include "qom/object.h"

@@ -268,22 +269,6 @@ static const MemoryRegionOps elroy_config_addr_ops = {
 };


-/*
- * A subroutine of astro_translate_iommu that builds an IOMMUTLBEntry using the
- * given translated address and mask.
- */
-static bool make_iommu_tlbe(hwaddr addr, hwaddr taddr, hwaddr mask,
-                            IOMMUTLBEntry *ret)
-{
-    hwaddr tce_mask = ~((1ull << 12) - 1);
-    ret->target_as = &address_space_memory;
-    ret->iova = addr & tce_mask;
-    ret->translated_addr = taddr & tce_mask;
-    ret->addr_mask = ~tce_mask;
-    ret->perm = IOMMU_RW;
-    return true;
-}
-
 /* Handle PCI-to-system address translation.  */
 static IOMMUTLBEntry astro_translate_iommu(IOMMUMemoryRegion *iommu,
                                             hwaddr addr,
@@ -291,53 +276,59 @@ static IOMMUTLBEntry astro_translate_iommu(IOMMUMemoryRegion *iommu,
                                             int iommu_idx)
 {
    AstroState *s = container_of(iommu, AstroState, iommu);
-    IOMMUTLBEntry ret = {
-        .target_as = &address_space_memory,
-        .iova = addr,
-        .translated_addr = 0,
-        .addr_mask = ~(hwaddr)0,
-        .perm = IOMMU_NONE,
-    };
-    hwaddr pdir_ptr, index, a, ibase;
+    hwaddr pdir_ptr, index, ibase;
    hwaddr addr_mask = 0xfff; /* 4k translation */
    uint64_t entry;

 #define IOVP_SHIFT              12   /* equals PAGE_SHIFT */
 #define PDIR_INDEX(iovp)        ((iovp) >> IOVP_SHIFT)
-#define IOVP_MASK               PAGE_MASK
 #define SBA_PDIR_VALID_BIT      0x8000000000000000ULL

+    addr &= ~addr_mask;
+
+    /*
+     * Default translation: "32-bit PCI Addressing on 40-bit Runway".
+     * For addresses in the 32-bit memory address range ... and then
+     * language which not-coincidentally matches the PSW.W=0 mapping.
+     */
+    if (addr <= UINT32_MAX) {
+        entry = hppa_abs_to_phys_pa2_w0(addr);
+    } else {
+        entry = addr;
+    }
+
    /* "range enable" flag cleared? */
    if ((s->tlb_ibase & 1) == 0) {
-        make_iommu_tlbe(addr, addr, addr_mask, &ret);
-        return ret;
+        goto skip;
    }

-    a = addr;
    ibase = s->tlb_ibase & ~1ULL;
-    if ((a & s->tlb_imask) != ibase) {
+    if ((addr & s->tlb_imask) != ibase) {
        /* do not translate this one! */
-        make_iommu_tlbe(addr, addr, addr_mask, &ret);
-        return ret;
+        goto skip;
    }
-    index = PDIR_INDEX(a);
+
+    index = PDIR_INDEX(addr);
    pdir_ptr = s->tlb_pdir_base + index * sizeof(entry);
    entry = ldq_le_phys(&address_space_memory, pdir_ptr);
+
    if (!(entry & SBA_PDIR_VALID_BIT)) { /* I/O PDIR entry valid ? */
-        g_assert_not_reached();
-        goto failure;
+        /* failure */
+        return (IOMMUTLBEntry) { .perm = IOMMU_NONE };
    }
+
    entry &= ~SBA_PDIR_VALID_BIT;
    entry >>= IOVP_SHIFT;
    entry <<= 12;
-    entry |= addr & 0xfff;
-    make_iommu_tlbe(addr, entry, addr_mask, &ret);
-    goto success;

- failure:
-    ret = (IOMMUTLBEntry) { .perm = IOMMU_NONE };
- success:
-    return ret;
+ skip:
+    return (IOMMUTLBEntry) {
+        .target_as = &address_space_memory,
+        .iova = addr,
+        .translated_addr = entry,
+        .addr_mask = addr_mask,
+        .perm = IOMMU_RW,
+    };
 }

 static AddressSpace *elroy_pcihost_set_iommu(PCIBus *bus, void *opaque,
--- a/hw/pci-host/meson.build
+++ b/hw/pci-host/meson.build
@@ -29,7 +29,7 @@ pci_ss.add(when: 'CONFIG_MV64361', if_true: files('mv64361.c'))
 pci_ss.add(when: 'CONFIG_VERSATILE_PCI', if_true: files('versatile.c'))

 # HPPA devices
-pci_ss.add(when: 'CONFIG_ASTRO', if_true: files('astro.c'))
+specific_ss.add(when: 'CONFIG_ASTRO', if_true: files('astro.c'))
 pci_ss.add(when: 'CONFIG_DINO', if_true: files('dino.c'))

 system_ss.add_all(when: 'CONFIG_PCI', if_true: pci_ss)
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -179,6 +179,10 @@ static Property q35_host_props[] = {
                     mch.below_4g_mem_size, 0),
    DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MEM_SIZE, Q35PCIHost,
                     mch.above_4g_mem_size, 0),
+    DEFINE_PROP_BOOL(PCI_HOST_PROP_SMM_RANGES, Q35PCIHost,
+                     mch.has_smm_ranges, true),
+    DEFINE_PROP_BOOL(PCI_HOST_PROP_PAM_MEMORY_AREA, Q35PCIHost,
+                     mch.has_pam_memory_area, true),
    DEFINE_PROP_BOOL("x-pci-hole64-fix", Q35PCIHost, pci_hole64_fix, true),
    DEFINE_PROP_END_OF_LIST(),
 };
@@ -214,6 +218,7 @@ static void q35_host_initfn(Object *obj)
    /* mch's object_initialize resets the default value, set it again */
    qdev_prop_set_uint64(DEVICE(s), PCI_HOST_PROP_PCI_HOLE64_SIZE,
                         Q35_PCI_HOST_HOLE64_SIZE_DEFAULT);
+
    object_property_add(obj, PCI_HOST_PROP_PCI_HOLE_START, "uint32",
                        q35_host_get_pci_hole_start,
                        NULL, NULL, NULL);
@@ -466,9 +471,11 @@ static void mch_write_config(PCIDevice *d,

    pci_default_write_config(d, address, val, len);

-    if (ranges_overlap(address, len, MCH_HOST_BRIDGE_PAM0,
-                       MCH_HOST_BRIDGE_PAM_SIZE)) {
-        mch_update_pam(mch);
+    if (mch->has_pam_memory_area) {
+        if (ranges_overlap(address, len, MCH_HOST_BRIDGE_PAM0,
+                           MCH_HOST_BRIDGE_PAM_SIZE)) {
+            mch_update_pam(mch);
+        }
    }

    if (ranges_overlap(address, len, MCH_HOST_BRIDGE_PCIEXBAR,
@@ -476,6 +483,10 @@ static void mch_write_config(PCIDevice *d,
        mch_update_pciexbar(mch);
    }

+    if (!mch->has_smm_ranges) {
+        return;
+    }
+
    if (ranges_overlap(address, len, MCH_HOST_BRIDGE_SMRAM,
                       MCH_HOST_BRIDGE_SMRAM_SIZE)) {
        mch_update_smram(mch);
@@ -494,10 +505,15 @@ static void mch_write_config(PCIDevice *d,
 static void mch_update(MCHPCIState *mch)
 {
    mch_update_pciexbar(mch);
-    mch_update_pam(mch);
-    mch_update_smram(mch);
-    mch_update_ext_tseg_mbytes(mch);
-    mch_update_smbase_smram(mch);
+
+    if (mch->has_pam_memory_area) {
+        mch_update_pam(mch);
+    }
+    if (mch->has_smm_ranges) {
+        mch_update_smram(mch);
+        mch_update_ext_tseg_mbytes(mch);
+        mch_update_smbase_smram(mch);
+    }

    /*
     * pci hole goes from end-of-low-ram to io-apic.
@@ -534,22 +550,33 @@ static void mch_reset(DeviceState *qdev)
 {
    PCIDevice *d = PCI_DEVICE(qdev);
    MCHPCIState *mch = MCH_PCI_DEVICE(d);
+    int i;

-    pci_set_quad(d->config + MCH_HOST_BRIDGE_PCIEXBAR,
-                 MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT);
-
-    d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT;
-    d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT;
-    d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK;
-    d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK;
-
-    if (mch->ext_tseg_mbytes > 0) {
-        pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES,
-                     MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY);
+    if (!mch->txt_locked) {
+        pci_set_quad(d->config + MCH_HOST_BRIDGE_PCIEXBAR,
+                     MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT);
    }

-    d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0;
-    d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff;
+    if (mch->has_pam_memory_area) {
+        for (i = 0; i < MCH_HOST_BRIDGE_PAM_NB; i++) {
+            pci_set_byte(d->config + MCH_HOST_BRIDGE_PAM0 + i, 0);
+        }
+    }
+
+    if (mch->has_smm_ranges) {
+        d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT;
+        d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT;
+        d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK;
+        d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK;
+
+        if (mch->ext_tseg_mbytes > 0) {
+            pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES,
+                        MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY);
+        }
+
+        d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0;
+        d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff;
+    }

    mch_update(mch);
 }
@@ -564,10 +591,39 @@ static void mch_realize(PCIDevice *d, Error **errp)
                   mch->ext_tseg_mbytes);
        return;
    }
-
+    if (mch->txt_locked) {
+        pci_set_quad(d->config + MCH_HOST_BRIDGE_PCIEXBAR,
+                     MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT_TXT_LOCKED);
+        pci_set_quad(d->wmask + MCH_HOST_BRIDGE_PCIEXBAR, 0);
+        for (i = 0; i < MCH_HOST_BRIDGE_PAM_NB; i++) {
+            pci_set_byte(d->config + MCH_HOST_BRIDGE_PAM0 + i, 0);
+            pci_set_byte(d->wmask + MCH_HOST_BRIDGE_PAM0 + i, 0);
+        }
+    }
    /* setup pci memory mapping */
    pc_pci_as_mapping_init(mch->system_memory, mch->pci_address_space);

+    /* PAM */
+    if (mch->has_pam_memory_area) {
+        init_pam(&mch->pam_regions[0], OBJECT(mch), mch->ram_memory,
+                 mch->system_memory, mch->pci_address_space,
+                 PAM_BIOS_BASE, PAM_BIOS_SIZE);
+        for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) {
+            init_pam(&mch->pam_regions[i + 1], OBJECT(mch), mch->ram_memory,
+                     mch->system_memory, mch->pci_address_space,
+                     PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE);
+        }
+	} else {
+        /* disallow to configure PAM */
+        for (i = 0; i < MCH_HOST_BRIDGE_PAM_NB; i++) {
+            pci_set_byte(d->wmask + MCH_HOST_BRIDGE_PAM0 + i, 0);
+        }
+    }
+
+    if (!mch->has_smm_ranges) {
+        return;
+    }
+
    /* if *disabled* show SMRAM to all CPUs */
    memory_region_init_alias(&mch->smram_region, OBJECT(mch), "smram-region",
                             mch->pci_address_space, MCH_HOST_BRIDGE_SMRAM_C_BASE,
@@ -634,15 +690,6 @@ static void mch_realize(PCIDevice *d, Error **errp)

    object_property_add_const_link(qdev_get_machine(), "smram",
                                   OBJECT(&mch->smram));
-
-    init_pam(&mch->pam_regions[0], OBJECT(mch), mch->ram_memory,
-             mch->system_memory, mch->pci_address_space,
-             PAM_BIOS_BASE, PAM_BIOS_SIZE);
-    for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) {
-        init_pam(&mch->pam_regions[i + 1], OBJECT(mch), mch->ram_memory,
-                 mch->system_memory, mch->pci_address_space,
-                 PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE);
-    }
 }

 uint64_t mch_mcfg_base(void)
@@ -659,6 +706,7 @@ static Property mch_props[] = {
    DEFINE_PROP_UINT16("extended-tseg-mbytes", MCHPCIState, ext_tseg_mbytes,
                       16),
    DEFINE_PROP_BOOL("smbase-smram", MCHPCIState, has_smram_at_smbase, true),
+    DEFINE_PROP_BOOL("txt-locked", MCHPCIState, txt_locked, false),
    DEFINE_PROP_END_OF_LIST(),
 };

--- a/hw/s390x/s390-pci-vfio.c
+++ b/hw/s390x/s390-pci-vfio.c
@@ -66,6 +66,10 @@ S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,

    assert(vpdev);

+    if (!vpdev->vbasedev.group) {
+        return NULL;
+    }
+
    id = vpdev->vbasedev.group->container->fd;

    if (!s390_pci_update_dma_avail(id, &avail)) {
@@ -132,7 +136,7 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev,
     * to the guest based upon the vfio DMA limit.
     */
    vfio_size = pbdev->iommu->max_dma_limit << TARGET_PAGE_BITS;
-    if (vfio_size < (cap->end_dma - cap->start_dma + 1)) {
+    if (vfio_size > 0 && vfio_size < cap->end_dma - cap->start_dma + 1) {
        pbdev->zpci_fn.edma = cap->start_dma + vfio_size - 1;
    }
 }
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -250,6 +250,7 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
    return (!memory_region_is_ram(section->mr) &&
            !memory_region_is_iommu(section->mr)) ||
           memory_region_is_protected(section->mr) ||
+           memory_region_has_guest_memfd(section->mr) ||
           /*
            * Sizing an enabled 64-bit BAR can cause spurious mappings to
            * addresses in the upper part of the 64-bit address space.  These
@@ -345,28 +346,25 @@ out:
    rcu_read_unlock();
 }

-static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl,
-                                            MemoryRegionSection *section)
+static void vfio_notify_discard_generic(VFIOContainer *container,
+                                        MemoryRegionSection *section)
 {
-    VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
-                                                listener);
    const hwaddr size = int128_get64(section->size);
    const hwaddr iova = section->offset_within_address_space;
    int ret;

    /* Unmap with a single call. */
-    ret = vfio_dma_unmap(vrdl->container, iova, size , NULL);
+    ret = vfio_dma_unmap(container, iova, size , NULL);
    if (ret) {
        error_report("%s: vfio_dma_unmap() failed: %s", __func__,
                     strerror(-ret));
    }
 }

-static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
-                                            MemoryRegionSection *section)
+static int vfio_notify_populate_generic(VFIOContainer *container,
+                                        MemoryRegionSection *section,
+                                        uint64_t granularity)
 {
-    VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
-                                                listener);
    const hwaddr end = section->offset_within_region +
                       int128_get64(section->size);
    hwaddr start, next, iova;
@@ -378,24 +376,43 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
     * unmap in minimum granularity later.
     */
    for (start = section->offset_within_region; start < end; start = next) {
-        next = ROUND_UP(start + 1, vrdl->granularity);
+        next = ROUND_UP(start + 1, granularity);
        next = MIN(next, end);

        iova = start - section->offset_within_region +
               section->offset_within_address_space;
        vaddr = memory_region_get_ram_ptr(section->mr) + start;

-        ret = vfio_dma_map(vrdl->container, iova, next - start,
+        ret = vfio_dma_map(container, iova, next - start,
                           vaddr, section->readonly);
        if (ret) {
            /* Rollback */
-            vfio_ram_discard_notify_discard(rdl, section);
+            vfio_notify_discard_generic(container, section);
            return ret;
        }
    }
+
    return 0;
 }

+static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl,
+                                            MemoryRegionSection *section)
+{
+    VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
+                                                listener);
+
+    vfio_notify_discard_generic(vrdl->container, section);
+}
+
+static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
+                                            MemoryRegionSection *section)
+{
+    VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
+                                                listener);
+
+    return vfio_notify_populate_generic(vrdl->container, section, vrdl->granularity);
+}
+
 static void vfio_register_ram_discard_listener(VFIOContainer *container,
                                               MemoryRegionSection *section)
 {
@@ -1339,6 +1356,18 @@ static void vfio_listener_log_sync(MemoryListener *listener,
    }
 }

+static void vfio_listener_convert_mem_attr(MemoryListener *listener,
+                                           MemoryRegionSection *section,
+                                           bool shared)
+{
+    VFIOContainer *container = container_of(listener, VFIOContainer, listener);
+
+    if (shared)
+        vfio_notify_populate_generic(container, section, 1ULL << ctz64(container->pgsizes));
+    else
+        vfio_notify_discard_generic(container, section);
+}
+
 const MemoryListener vfio_memory_listener = {
    .name = "vfio",
    .region_add = vfio_listener_region_add,
@@ -1346,6 +1375,7 @@ const MemoryListener vfio_memory_listener = {
    .log_global_start = vfio_listener_log_global_start,
    .log_global_stop = vfio_listener_log_global_stop,
    .log_sync = vfio_listener_log_sync,
+    .convert_mem_attr = vfio_listener_convert_mem_attr,
 };

 void vfio_reset_handler(void *opaque)
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -525,9 +525,7 @@ static void virtio_mem_activate_memslots_to_plug(VirtIOMEM *vmem,
                                 vmem->memslot_size;
    unsigned int idx;

-    if (!vmem->dynamic_memslots) {
-        return;
-    }
+    assert(vmem->dynamic_memslots);

    /* Activate all involved memslots in a single transaction. */
    memory_region_transaction_begin();
@@ -547,9 +545,7 @@ static void virtio_mem_deactivate_unplugged_memslots(VirtIOMEM *vmem,
                                 vmem->memslot_size;
    unsigned int idx;

-    if (!vmem->dynamic_memslots) {
-        return;
-    }
+    assert(vmem->dynamic_memslots);

    /* Deactivate all memslots with unplugged blocks in a single transaction. */
    memory_region_transaction_begin();
@@ -598,7 +594,9 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
        virtio_mem_notify_unplug(vmem, offset, size);
        virtio_mem_set_range_unplugged(vmem, start_gpa, size);
        /* Deactivate completely unplugged memslots after updating the state. */
-        virtio_mem_deactivate_unplugged_memslots(vmem, offset, size);
+        if (vmem->dynamic_memslots) {
+            virtio_mem_deactivate_unplugged_memslots(vmem, offset, size);
+        }
        return 0;
    }

@@ -635,9 +633,11 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
         * blocks we are plugging here. The following notification will inform
         * registered listeners about the blocks we're plugging.
         */
-        virtio_mem_activate_memslots_to_plug(vmem, offset, size);
+        if (vmem->dynamic_memslots) {
+            virtio_mem_activate_memslots_to_plug(vmem, offset, size);
+        }
        ret = virtio_mem_notify_plug(vmem, offset, size);
-        if (ret) {
+        if (ret && vmem->dynamic_memslots) {
            virtio_mem_deactivate_unplugged_memslots(vmem, offset, size);
        }
    }
@@ -749,7 +749,9 @@ static int virtio_mem_unplug_all(VirtIOMEM *vmem)
        notifier_list_notify(&vmem->size_change_notifiers, &vmem->size);

        /* Deactivate all memslots after updating the state. */
-        virtio_mem_deactivate_unplugged_memslots(vmem, 0, region_size);
+        if (vmem->dynamic_memslots) {
+            virtio_mem_deactivate_unplugged_memslots(vmem, 0, region_size);
+        }
    }

    trace_virtio_mem_unplugged_all();
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -175,6 +175,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque);

 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
 int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
+int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length,
+                            bool shared_to_private);

 #endif

--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -243,6 +243,12 @@ typedef struct IOMMUTLBEvent {
 /* RAM FD is opened read-only */
 #define RAM_READONLY_FD (1 << 11)

+/* RAM can be private that has kvm gmem backend */
+#define RAM_GUEST_MEMFD   (1 << 12)
+
+/* RAM is default private */
+#define RAM_DEFAULT_PRIVATE     (1 << 13)
+
 static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
                                       IOMMUNotifierFlag flags,
                                       hwaddr start, hwaddr end,
@@ -844,6 +850,7 @@ struct IOMMUMemoryRegion {
 #define MEMORY_LISTENER_PRIORITY_MIN            0
 #define MEMORY_LISTENER_PRIORITY_ACCEL          10
 #define MEMORY_LISTENER_PRIORITY_DEV_BACKEND    10
+#define MEMORY_LISTENER_PRIORITY_ACCEL_HIGH     20

 /**
 * struct MemoryListener: callbacks structure for updates to the physical memory map
@@ -1083,6 +1090,19 @@ struct MemoryListener {
     */
    void (*coalesced_io_del)(MemoryListener *listener, MemoryRegionSection *section,
                               hwaddr addr, hwaddr len);
+
+    /**
+     * @convert_mem_attr:
+     *
+     * Called during the memory attribute conversion.
+     *
+     * @listener: The #MemoryListener
+     * @section: The MemoryRegionSection
+     * @shared: convert memory attrubute from private to shared
+     */
+    void (*convert_mem_attr)(MemoryListener *listener, MemoryRegionSection *section,
+                            bool shared);
+
    /**
     * @priority:
     *
@@ -1583,6 +1603,12 @@ void memory_region_init_ram(MemoryRegion *mr,
                            uint64_t size,
                            Error **errp);

+void memory_region_init_ram_guest_memfd(MemoryRegion *mr,
+                                        Object *owner,
+                                        const char *name,
+                                        uint64_t size,
+                                        Error **errp);
+
 /**
 * memory_region_init_rom: Initialize a ROM memory region.
 *
@@ -1702,6 +1728,19 @@ static inline bool memory_region_is_romd(MemoryRegion *mr)
 */
 bool memory_region_is_protected(MemoryRegion *mr);

+/**
+ * memory_region_has_guest_memfd: check whether a memory region has guest_memfd
+ *     associated
+ *
+ * Returns %true if a memory region's ram_block has valid guest_memfd assigned.
+ *
+ * @mr: the memory region being queried
+ */
+bool memory_region_has_guest_memfd(MemoryRegion *mr);
+
+void memory_region_set_default_private(MemoryRegion *mr);
+bool memory_region_is_default_private(MemoryRegion *mr);
+
 /**
 * memory_region_get_iommu: check whether a memory region is an iommu
 *
@@ -2492,6 +2531,14 @@ void memory_region_set_ram_discard_manager(MemoryRegion *mr,
 MemoryRegionSection memory_region_find(MemoryRegion *mr,
                                       hwaddr addr, uint64_t size);

+/**
+ * memory_region_convert_mem_attr: convert the memory attribute
+ * @section: the #MemoryRegionSection to be converted
+ * @shared: if true, convert attribute from private to shared;
+ *          if false, convert from shared to private
+ */
+void memory_region_convert_mem_attr(MemoryRegionSection *section, bool shared);
+
 /**
 * memory_global_dirty_log_sync: synchronize the dirty log for all memory
 *
--- a/include/exec/ramblock.h
+++ b/include/exec/ramblock.h
@@ -41,6 +41,7 @@ struct RAMBlock {
    QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
    int fd;
    uint64_t fd_offset;
+    int guest_memfd;
    size_t page_size;
    /* dirty bitmap used during migration */
    unsigned long *bmap;
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -30,6 +30,7 @@ bool machine_usb(MachineState *machine);
 int machine_phandle_start(MachineState *machine);
 bool machine_dump_guest_core(MachineState *machine);
 bool machine_mem_merge(MachineState *machine);
+bool machine_require_guest_memfd(MachineState *machine);
 HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine);
 void machine_set_cpu_numa_node(MachineState *machine,
                               const CpuInstanceProperties *props,
@@ -364,6 +365,7 @@ struct MachineState {
    char *dt_compatible;
    bool dump_guest_core;
    bool mem_merge;
+    bool require_guest_memfd;
    bool usb;
    bool usb_disabled;
    char *firmware;
--- a/include/hw/i386/apic.h
+++ b/include/hw/i386/apic.h
@@ -17,6 +17,7 @@ void apic_init_reset(DeviceState *s);
 void apic_sipi(DeviceState *s);
 void apic_poll_irq(DeviceState *d);
 void apic_designate_bsp(DeviceState *d, bool bsp);
+void apic_force_x2apic(DeviceState *d);
 int apic_get_highest_priority_irr(DeviceState *dev);

 /* pc.c */
--- a/include/hw/i386/apic_internal.h
+++ b/include/hw/i386/apic_internal.h
@@ -187,6 +187,7 @@ struct APICCommonState {
    DeviceState *vapic;
    hwaddr vapic_paddr; /* note: persistence via kvmvapic */
    bool legacy_instance_id;
+    bool force_x2apic;
 };

 typedef struct VAPICState {
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -165,6 +165,8 @@ void pc_guest_info_init(PCMachineState *pcms);
 #define PCI_HOST_PROP_PCI_HOLE64_SIZE  "pci-hole64-size"
 #define PCI_HOST_BELOW_4G_MEM_SIZE     "below-4g-mem-size"
 #define PCI_HOST_ABOVE_4G_MEM_SIZE     "above-4g-mem-size"
+#define PCI_HOST_PROP_SMM_RANGES       "smm-ranges"
+#define PCI_HOST_PROP_PAM_MEMORY_AREA  "pam-memory-area"


 void pc_pci_as_mapping_init(MemoryRegion *system_memory,
@@ -309,15 +311,12 @@ extern const size_t pc_compat_1_5_len;
 extern GlobalProperty pc_compat_1_4[];
 extern const size_t pc_compat_1_4_len;

-int pc_machine_kvm_type(MachineState *machine, const char *vm_type);
-
 #define DEFINE_PC_MACHINE(suffix, namestr, initfn, optsfn) \
    static void pc_machine_##suffix##_class_init(ObjectClass *oc, void *data) \
    { \
        MachineClass *mc = MACHINE_CLASS(oc); \
        optsfn(mc); \
        mc->init = initfn; \
-        mc->kvm_type = pc_machine_kvm_type; \
    } \
    static const TypeInfo pc_machine_type_##suffix = { \
        .name       = namestr TYPE_MACHINE_SUFFIX, \
--- a/include/hw/i386/tdvf.h
+++ b/include/hw/i386/tdvf.h
@@ -0,0 +1,58 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+
+ * Copyright (c) 2020 Intel Corporation
+ * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
+ *                        <isaku.yamahata at intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_I386_TDVF_H
+#define HW_I386_TDVF_H
+
+#include "qemu/osdep.h"
+
+#define TDVF_SECTION_TYPE_BFV               0
+#define TDVF_SECTION_TYPE_CFV               1
+#define TDVF_SECTION_TYPE_TD_HOB            2
+#define TDVF_SECTION_TYPE_TEMP_MEM          3
+
+#define TDVF_SECTION_ATTRIBUTES_MR_EXTEND   (1U << 0)
+#define TDVF_SECTION_ATTRIBUTES_PAGE_AUG    (1U << 1)
+
+typedef struct TdxFirmwareEntry {
+    uint32_t data_offset;
+    uint32_t data_len;
+    uint64_t address;
+    uint64_t size;
+    uint32_t type;
+    uint32_t attributes;
+
+    void *mem_ptr;
+} TdxFirmwareEntry;
+
+typedef struct TdxFirmware {
+    void *mem_ptr;
+
+    uint32_t nr_entries;
+    TdxFirmwareEntry *entries;
+} TdxFirmware;
+
+#define for_each_tdx_fw_entry(fw, e)    \
+    for (e = (fw)->entries; e != (fw)->entries + (fw)->nr_entries; e++)
+
+int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size);
+
+#endif /* HW_I386_TDVF_H */
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -41,6 +41,7 @@ struct X86MachineState {
    MachineState parent;

    /*< public >*/
+    unsigned int vm_type;

    /* Pointers to devices and objects: */
    ISADevice *rtc;
@@ -58,6 +59,7 @@ struct X86MachineState {

    /* CPU and apic information: */
    bool apic_xrupt_override;
+    bool eoi_intercept_unsupported;
    unsigned pci_irq_mask;
    unsigned apic_id_limit;
    uint16_t boot_cpus;
@@ -67,6 +69,7 @@ struct X86MachineState {
    OnOffAuto acpi;
    OnOffAuto pit;
    OnOffAuto pic;
+    OnOffAuto pam;

    char *oem_id;
    char *oem_table_id;
@@ -88,6 +91,7 @@ struct X86MachineState {
 #define X86_MACHINE_ACPI             "acpi"
 #define X86_MACHINE_PIT              "pit"
 #define X86_MACHINE_PIC              "pic"
+#define X86_MACHINE_PAM              "pam"
 #define X86_MACHINE_OEM_ID           "x-oem-id"
 #define X86_MACHINE_OEM_TABLE_ID     "x-oem-table-id"
 #define X86_MACHINE_BUS_LOCK_RATELIMIT  "bus-lock-ratelimit"
@@ -125,6 +129,7 @@ void x86_load_linux(X86MachineState *x86ms,

 bool x86_machine_is_smm_enabled(const X86MachineState *x86ms);
 bool x86_machine_is_acpi_enabled(const X86MachineState *x86ms);
+bool x86_machine_is_pam_enabled(const X86MachineState *x86ms);

 /* Global System Interrupts */

--- a/include/hw/pci-host/q35.h
+++ b/include/hw/pci-host/q35.h
@@ -50,6 +50,9 @@ struct MCHPCIState {
    MemoryRegion tseg_blackhole, tseg_window;
    MemoryRegion smbase_blackhole, smbase_window;
    bool has_smram_at_smbase;
+    bool has_smm_ranges;
+    bool has_pam_memory_area;
+    bool txt_locked;
    Range pci_hole;
    uint64_t below_4g_mem_size;
    uint64_t above_4g_mem_size;
@@ -97,6 +100,7 @@ struct Q35PCIHost {
 #define MCH_HOST_BRIDGE_PCIEXBAR               0x60    /* 64bit register */
 #define MCH_HOST_BRIDGE_PCIEXBAR_SIZE          8       /* 64bit register */
 #define MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT       0xb0000000
+#define MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT_TXT_LOCKED 0xb0000001
 #define MCH_HOST_BRIDGE_PCIEXBAR_MAX           (0x10000000) /* 256M */
 #define MCH_HOST_BRIDGE_PCIEXBAR_ADMSK         Q35_MASK(64, 35, 28)
 #define MCH_HOST_BRIDGE_PCIEXBAR_128ADMSK      ((uint64_t)(1 << 26))
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -51,6 +51,7 @@ void hmp_announce_self(Monitor *mon, const QDict *qdict);
 void hmp_cpu(Monitor *mon, const QDict *qdict);
 void hmp_memsave(Monitor *mon, const QDict *qdict);
 void hmp_pmemsave(Monitor *mon, const QDict *qdict);
+void hmp_pmemclear(Monitor *mon, const QDict *qdict);
 void hmp_ringbuf_write(Monitor *mon, const QDict *qdict);
 void hmp_ringbuf_read(Monitor *mon, const QDict *qdict);
 void hmp_cont(Monitor *mon, const QDict *qdict);
--- a/include/qemu/int128.h
+++ b/include/qemu/int128.h
@@ -10,6 +10,7 @@
 */
 #if defined(CONFIG_INT128) && !defined(CONFIG_TCG_INTERPRETER)
 typedef __int128_t Int128;
+typedef __int128_t __attribute__((aligned(16))) Int128Aligned;

 static inline Int128 int128_make64(uint64_t a)
 {
@@ -224,6 +225,7 @@ static inline Int128 int128_rems(Int128 a, Int128 b)
 #else /* !CONFIG_INT128 */

 typedef struct Int128 Int128;
+typedef struct Int128 __attribute__((aligned(16))) Int128Aligned;

 /*
 * We guarantee that the in-memory byte representation of an
--- a/include/standard-headers/uefi/uefi.h
+++ b/include/standard-headers/uefi/uefi.h
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2020 Intel Corporation
+ *
+ * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
+ *                        <isaku.yamahata at intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef HW_I386_UEFI_H
+#define HW_I386_UEFI_H
+
+/***************************************************************************/
+/*
+ * basic EFI definitions
+ * supplemented with UEFI Specification Version 2.8 (Errata A)
+ * released February 2020
+ */
+/* UEFI integer is little endian */
+
+typedef struct {
+    uint32_t Data1;
+    uint16_t Data2;
+    uint16_t Data3;
+    uint8_t Data4[8];
+} EFI_GUID;
+
+typedef enum {
+    EfiReservedMemoryType,
+    EfiLoaderCode,
+    EfiLoaderData,
+    EfiBootServicesCode,
+    EfiBootServicesData,
+    EfiRuntimeServicesCode,
+    EfiRuntimeServicesData,
+    EfiConventionalMemory,
+    EfiUnusableMemory,
+    EfiACPIReclaimMemory,
+    EfiACPIMemoryNVS,
+    EfiMemoryMappedIO,
+    EfiMemoryMappedIOPortSpace,
+    EfiPalCode,
+    EfiPersistentMemory,
+    EfiUnacceptedMemoryType,
+    EfiMaxMemoryType
+} EFI_MEMORY_TYPE;
+
+#define EFI_HOB_HANDOFF_TABLE_VERSION 0x0009
+
+#define EFI_HOB_TYPE_HANDOFF              0x0001
+#define EFI_HOB_TYPE_MEMORY_ALLOCATION    0x0002
+#define EFI_HOB_TYPE_RESOURCE_DESCRIPTOR  0x0003
+#define EFI_HOB_TYPE_GUID_EXTENSION       0x0004
+#define EFI_HOB_TYPE_FV                   0x0005
+#define EFI_HOB_TYPE_CPU                  0x0006
+#define EFI_HOB_TYPE_MEMORY_POOL          0x0007
+#define EFI_HOB_TYPE_FV2                  0x0009
+#define EFI_HOB_TYPE_LOAD_PEIM_UNUSED     0x000A
+#define EFI_HOB_TYPE_UEFI_CAPSULE         0x000B
+#define EFI_HOB_TYPE_FV3                  0x000C
+#define EFI_HOB_TYPE_UNUSED               0xFFFE
+#define EFI_HOB_TYPE_END_OF_HOB_LIST      0xFFFF
+
+typedef struct {
+    uint16_t HobType;
+    uint16_t HobLength;
+    uint32_t Reserved;
+} EFI_HOB_GENERIC_HEADER;
+
+typedef uint64_t EFI_PHYSICAL_ADDRESS;
+typedef uint32_t EFI_BOOT_MODE;
+
+typedef struct {
+    EFI_HOB_GENERIC_HEADER Header;
+    uint32_t Version;
+    EFI_BOOT_MODE BootMode;
+    EFI_PHYSICAL_ADDRESS EfiMemoryTop;
+    EFI_PHYSICAL_ADDRESS EfiMemoryBottom;
+    EFI_PHYSICAL_ADDRESS EfiFreeMemoryTop;
+    EFI_PHYSICAL_ADDRESS EfiFreeMemoryBottom;
+    EFI_PHYSICAL_ADDRESS EfiEndOfHobList;
+} EFI_HOB_HANDOFF_INFO_TABLE;
+
+#define EFI_RESOURCE_SYSTEM_MEMORY          0x00000000
+#define EFI_RESOURCE_MEMORY_MAPPED_IO       0x00000001
+#define EFI_RESOURCE_IO                     0x00000002
+#define EFI_RESOURCE_FIRMWARE_DEVICE        0x00000003
+#define EFI_RESOURCE_MEMORY_MAPPED_IO_PORT  0x00000004
+#define EFI_RESOURCE_MEMORY_RESERVED        0x00000005
+#define EFI_RESOURCE_IO_RESERVED            0x00000006
+#define EFI_RESOURCE_MEMORY_UNACCEPTED      0x00000007
+#define EFI_RESOURCE_MAX_MEMORY_TYPE        0x00000008
+
+#define EFI_RESOURCE_ATTRIBUTE_PRESENT                  0x00000001
+#define EFI_RESOURCE_ATTRIBUTE_INITIALIZED              0x00000002
+#define EFI_RESOURCE_ATTRIBUTE_TESTED                   0x00000004
+#define EFI_RESOURCE_ATTRIBUTE_SINGLE_BIT_ECC           0x00000008
+#define EFI_RESOURCE_ATTRIBUTE_MULTIPLE_BIT_ECC         0x00000010
+#define EFI_RESOURCE_ATTRIBUTE_ECC_RESERVED_1           0x00000020
+#define EFI_RESOURCE_ATTRIBUTE_ECC_RESERVED_2           0x00000040
+#define EFI_RESOURCE_ATTRIBUTE_READ_PROTECTED           0x00000080
+#define EFI_RESOURCE_ATTRIBUTE_WRITE_PROTECTED          0x00000100
+#define EFI_RESOURCE_ATTRIBUTE_EXECUTION_PROTECTED      0x00000200
+#define EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE              0x00000400
+#define EFI_RESOURCE_ATTRIBUTE_WRITE_COMBINEABLE        0x00000800
+#define EFI_RESOURCE_ATTRIBUTE_WRITE_THROUGH_CACHEABLE  0x00001000
+#define EFI_RESOURCE_ATTRIBUTE_WRITE_BACK_CACHEABLE     0x00002000
+#define EFI_RESOURCE_ATTRIBUTE_16_BIT_IO                0x00004000
+#define EFI_RESOURCE_ATTRIBUTE_32_BIT_IO                0x00008000
+#define EFI_RESOURCE_ATTRIBUTE_64_BIT_IO                0x00010000
+#define EFI_RESOURCE_ATTRIBUTE_UNCACHED_EXPORTED        0x00020000
+#define EFI_RESOURCE_ATTRIBUTE_READ_ONLY_PROTECTED      0x00040000
+#define EFI_RESOURCE_ATTRIBUTE_READ_ONLY_PROTECTABLE    0x00080000
+#define EFI_RESOURCE_ATTRIBUTE_READ_PROTECTABLE         0x00100000
+#define EFI_RESOURCE_ATTRIBUTE_WRITE_PROTECTABLE        0x00200000
+#define EFI_RESOURCE_ATTRIBUTE_EXECUTION_PROTECTABLE    0x00400000
+#define EFI_RESOURCE_ATTRIBUTE_PERSISTENT               0x00800000
+#define EFI_RESOURCE_ATTRIBUTE_PERSISTABLE              0x01000000
+#define EFI_RESOURCE_ATTRIBUTE_MORE_RELIABLE            0x02000000
+
+typedef uint32_t EFI_RESOURCE_TYPE;
+typedef uint32_t EFI_RESOURCE_ATTRIBUTE_TYPE;
+
+typedef struct {
+    EFI_HOB_GENERIC_HEADER Header;
+    EFI_GUID Owner;
+    EFI_RESOURCE_TYPE ResourceType;
+    EFI_RESOURCE_ATTRIBUTE_TYPE ResourceAttribute;
+    EFI_PHYSICAL_ADDRESS PhysicalStart;
+    uint64_t ResourceLength;
+} EFI_HOB_RESOURCE_DESCRIPTOR;
+
+typedef struct {
+    EFI_HOB_GENERIC_HEADER Header;
+    EFI_GUID Name;
+
+    /* guid specific data follows */
+} EFI_HOB_GUID_TYPE;
+
+typedef struct {
+    EFI_HOB_GENERIC_HEADER Header;
+    EFI_PHYSICAL_ADDRESS BaseAddress;
+    uint64_t Length;
+} EFI_HOB_FIRMWARE_VOLUME;
+
+typedef struct {
+    EFI_HOB_GENERIC_HEADER Header;
+    EFI_PHYSICAL_ADDRESS BaseAddress;
+    uint64_t Length;
+    EFI_GUID FvName;
+    EFI_GUID FileName;
+} EFI_HOB_FIRMWARE_VOLUME2;
+
+typedef struct {
+    EFI_HOB_GENERIC_HEADER Header;
+    EFI_PHYSICAL_ADDRESS BaseAddress;
+    uint64_t Length;
+    uint32_t AuthenticationStatus;
+    bool ExtractedFv;
+    EFI_GUID FvName;
+    EFI_GUID FileName;
+} EFI_HOB_FIRMWARE_VOLUME3;
+
+typedef struct {
+    EFI_HOB_GENERIC_HEADER Header;
+    uint8_t SizeOfMemorySpace;
+    uint8_t SizeOfIoSpace;
+    uint8_t Reserved[6];
+} EFI_HOB_CPU;
+
+typedef struct {
+    EFI_HOB_GENERIC_HEADER Header;
+} EFI_HOB_MEMORY_POOL;
+
+typedef struct {
+    EFI_HOB_GENERIC_HEADER Header;
+
+    EFI_PHYSICAL_ADDRESS BaseAddress;
+    uint64_t Length;
+} EFI_HOB_UEFI_CAPSULE;
+
+#define EFI_HOB_OWNER_ZERO                                      \
+    ((EFI_GUID){ 0x00000000, 0x0000, 0x0000,                    \
+        { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } })
+
+#endif
--- a/include/sysemu/dump-arch.h
+++ b/include/sysemu/dump-arch.h
@@ -24,6 +24,7 @@ typedef struct ArchDumpInfo {
    void (*arch_sections_add_fn)(DumpState *s);
    uint64_t (*arch_sections_write_hdr_fn)(DumpState *s, uint8_t *buff);
    int (*arch_sections_write_fn)(DumpState *s, uint8_t *buff);
+    void (*arch_cleanup_fn)(DumpState *s);
 } ArchDumpInfo;

 struct GuestPhysBlockList; /* memory_mapping.h */
--- a/include/sysemu/hostmem.h
+++ b/include/sysemu/hostmem.h
@@ -66,6 +66,7 @@ struct HostMemoryBackend {
    uint64_t size;
    bool merge, dump, use_canonical_path;
    bool prealloc, is_mapped, share, reserve;
+    bool require_guest_memfd;
    uint32_t prealloc_threads;
    ThreadContext *prealloc_context;
    DECLARE_BITMAP(host_nodes, MAX_NODES + 1);
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -341,6 +341,7 @@ int kvm_arch_get_default_type(MachineState *ms);

 int kvm_arch_init(MachineState *ms, KVMState *s);

+int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp);
 int kvm_arch_init_vcpu(CPUState *cpu);
 int kvm_arch_destroy_vcpu(CPUState *cpu);

@@ -538,4 +539,11 @@ bool kvm_arch_cpu_check_are_resettable(void);
 bool kvm_dirty_ring_enabled(void);

 uint32_t kvm_dirty_ring_size(void);
+
+int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp);
+
+int kvm_set_memory_attributes_private(hwaddr start, hwaddr size);
+int kvm_set_memory_attributes_shared(hwaddr start, hwaddr size);
+
+int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private);
 #endif
--- a/include/sysemu/kvm_int.h
+++ b/include/sysemu/kvm_int.h
@@ -30,6 +30,8 @@ typedef struct KVMSlot
    int as_id;
    /* Cache of the offset in ram address space */
    ram_addr_t ram_start_offset;
+    int guest_memfd;
+    hwaddr guest_memfd_offset;
 } KVMSlot;

 typedef struct KVMMemoryUpdate {
--- a/linux-headers/asm-x86/kvm.h
+++ b/linux-headers/asm-x86/kvm.h
@@ -560,4 +560,98 @@ struct kvm_pmu_event_filter {
 /* x86-specific KVM_EXIT_HYPERCALL flags. */
 #define KVM_EXIT_HYPERCALL_LONG_MODE	BIT(0)

+#define KVM_X86_DEFAULT_VM	0
+#define KVM_X86_SW_PROTECTED_VM	1
+#define KVM_X86_TDX_VM		2
+#define KVM_X86_SNP_VM		3
+
+/* Trust Domain eXtension sub-ioctl() commands. */
+enum kvm_tdx_cmd_id {
+	KVM_TDX_CAPABILITIES = 0,
+	KVM_TDX_INIT_VM,
+	KVM_TDX_INIT_VCPU,
+	KVM_TDX_INIT_MEM_REGION,
+	KVM_TDX_FINALIZE_VM,
+	KVM_TDX_RELEASE_VM,
+
+	KVM_TDX_CMD_NR_MAX,
+};
+
+struct kvm_tdx_cmd {
+	/* enum kvm_tdx_cmd_id */
+	__u32 id;
+	/* flags for sub-commend. If sub-command doesn't use this, set zero. */
+	__u32 flags;
+	/*
+	 * data for each sub-command. An immediate or a pointer to the actual
+	 * data in process virtual address.  If sub-command doesn't use it,
+	 * set zero.
+	 */
+	__u64 data;
+	/*
+	 * Auxiliary error code.  The sub-command may return TDX SEAMCALL
+	 * status code in addition to -Exxx.
+	 * Defined for consistency with struct kvm_sev_cmd.
+	 */
+	__u64 error;
+};
+
+struct kvm_tdx_cpuid_config {
+	__u32 leaf;
+	__u32 sub_leaf;
+	__u32 eax;
+	__u32 ebx;
+	__u32 ecx;
+	__u32 edx;
+};
+
+struct kvm_tdx_capabilities {
+	__u64 attrs_fixed0;
+	__u64 attrs_fixed1;
+	__u64 xfam_fixed0;
+	__u64 xfam_fixed1;
+#define TDX_CAP_GPAW_48	(1 << 0)
+#define TDX_CAP_GPAW_52	(1 << 1)
+	__u32 supported_gpaw;
+	__u32 padding;
+	__u64 reserved[251];
+
+	__u32 nr_cpuid_configs;
+	struct kvm_tdx_cpuid_config cpuid_configs[];
+};
+
+struct kvm_tdx_init_vm {
+	__u64 attributes;
+	__u64 mrconfigid[6];	/* sha384 digest */
+	__u64 mrowner[6];	/* sha384 digest */
+	__u64 mrownerconfig[6];	/* sha348 digest */
+	/*
+	 * For future extensibility to make sizeof(struct kvm_tdx_init_vm) = 8KB.
+	 * This should be enough given sizeof(TD_PARAMS) = 1024.
+	 * 8KB was chosen given because
+	 * sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES(=256) = 8KB.
+	 */
+	__u64 reserved[1004];
+
+	/*
+	 * Call KVM_TDX_INIT_VM before vcpu creation, thus before
+	 * KVM_SET_CPUID2.
+	 * This configuration supersedes KVM_SET_CPUID2s for VCPUs because the
+	 * TDX module directly virtualizes those CPUIDs without VMM.  The user
+	 * space VMM, e.g. qemu, should make KVM_SET_CPUID2 consistent with
+	 * those values.  If it doesn't, KVM may have wrong idea of vCPUIDs of
+	 * the guest, and KVM may wrongly emulate CPUIDs or MSRs that the TDX
+	 * module doesn't virtualize.
+	 */
+	struct kvm_cpuid2 cpuid;
+};
+
+#define KVM_TDX_MEASURE_MEMORY_REGION	(1UL << 0)
+
+struct kvm_tdx_init_mem_region {
+	__u64 source_addr;
+	__u64 gpa;
+	__u64 nr_pages;
+};
+
 #endif /* _ASM_X86_KVM_H */
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -95,6 +95,19 @@ struct kvm_userspace_memory_region {
 	__u64 userspace_addr; /* start of the userspace allocated memory */
 };

+/* for KVM_SET_USER_MEMORY_REGION2 */
+struct kvm_userspace_memory_region2 {
+	__u32 slot;
+	__u32 flags;
+	__u64 guest_phys_addr;
+	__u64 memory_size;
+	__u64 userspace_addr;
+	__u64 guest_memfd_offset;
+	__u32 guest_memfd;
+	__u32 pad1;
+	__u64 pad2[14];
+};
+
 /*
 * The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for
 * userspace, other bits are reserved for kvm internal use which are defined
@@ -102,6 +115,7 @@ struct kvm_userspace_memory_region {
 */
 #define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
 #define KVM_MEM_READONLY	(1UL << 1)
+#define KVM_MEM_PRIVATE		(1UL << 2)

 /* for KVM_IRQ_LINE */
 struct kvm_irq_level {
@@ -223,6 +237,92 @@ struct kvm_xen_exit {
 	} u;
 };

+/* masks for reg_mask to indicate which registers are passed. */
+#define TDX_VMCALL_REG_MASK_RBX	BIT_ULL(2)
+#define TDX_VMCALL_REG_MASK_RDX	BIT_ULL(3)
+#define TDX_VMCALL_REG_MASK_RSI	BIT_ULL(6)
+#define TDX_VMCALL_REG_MASK_RDI	BIT_ULL(7)
+#define TDX_VMCALL_REG_MASK_R8	BIT_ULL(8)
+#define TDX_VMCALL_REG_MASK_R9	BIT_ULL(9)
+#define TDX_VMCALL_REG_MASK_R10	BIT_ULL(10)
+#define TDX_VMCALL_REG_MASK_R11	BIT_ULL(11)
+#define TDX_VMCALL_REG_MASK_R12	BIT_ULL(12)
+#define TDX_VMCALL_REG_MASK_R13	BIT_ULL(13)
+#define TDX_VMCALL_REG_MASK_R14	BIT_ULL(14)
+#define TDX_VMCALL_REG_MASK_R15	BIT_ULL(15)
+
+struct kvm_tdx_exit {
+#define KVM_EXIT_TDX_VMCALL	1
+	__u32 type;
+	__u32 pad;
+
+	union {
+		struct kvm_tdx_vmcall {
+			/*
+			 * RAX(bit 0), RCX(bit 1) and RSP(bit 4) are reserved.
+			 * RAX(bit 0): TDG.VP.VMCALL status code.
+			 * RCX(bit 1): bitmap for used registers.
+			 * RSP(bit 4): the caller stack.
+			 */
+			union {
+				__u64 in_rcx;
+				__u64 reg_mask;
+			};
+
+			/*
+			 * Guest-Host-Communication Interface for TDX spec
+			 * defines the ABI for TDG.VP.VMCALL.
+			 */
+			/* Input parameters: guest -> VMM */
+			union {
+				__u64 in_r10;
+				__u64 type;
+			};
+			union {
+				__u64 in_r11;
+				__u64 subfunction;
+			};
+			/*
+			 * Subfunction specific.
+			 * Registers are used in this order to pass input
+			 * arguments.  r12=arg0, r13=arg1, etc.
+			 */
+			__u64 in_r12;
+			__u64 in_r13;
+			__u64 in_r14;
+			__u64 in_r15;
+			__u64 in_rbx;
+			__u64 in_rdi;
+			__u64 in_rsi;
+			__u64 in_r8;
+			__u64 in_r9;
+			__u64 in_rdx;
+
+			/* Output parameters: VMM -> guest */
+			union {
+				__u64 out_r10;
+				__u64 status_code;
+			};
+			/*
+			 * Subfunction specific.
+			 * Registers are used in this order to output return
+			 * values.  r11=ret0, r12=ret1, etc.
+			 */
+			__u64 out_r11;
+			__u64 out_r12;
+			__u64 out_r13;
+			__u64 out_r14;
+			__u64 out_r15;
+			__u64 out_rbx;
+			__u64 out_rdi;
+			__u64 out_rsi;
+			__u64 out_r8;
+			__u64 out_r9;
+			__u64 out_rdx;
+		} vmcall;
+	} u;
+};
+
 #define KVM_S390_GET_SKEYS_NONE   1
 #define KVM_S390_SKEYS_MAX        1048576

@@ -265,6 +365,9 @@ struct kvm_xen_exit {
 #define KVM_EXIT_RISCV_CSR        36
 #define KVM_EXIT_NOTIFY           37

+#define KVM_EXIT_TDX              50
+#define KVM_EXIT_MEMORY_FAULT     100
+
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
 #define KVM_INTERNAL_ERROR_EMULATION	1
@@ -506,6 +609,15 @@ struct kvm_run {
 #define KVM_NOTIFY_CONTEXT_INVALID	(1 << 0)
 			__u32 flags;
 		} notify;
+		/* KVM_EXIT_MEMORY_FAULT */
+		struct {
+#define KVM_MEMORY_EXIT_FLAG_PRIVATE	(1ULL << 3)
+			__u64 flags;
+			__u64 gpa;
+			__u64 size;
+		} memory_fault;
+		/* KVM_EXIT_TDX_VMCALL */
+		struct kvm_tdx_exit tdx;
 		/* Fix the size of the union. */
 		char padding[256];
 	};
@@ -1189,6 +1301,13 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228
 #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229

+/* TODO: remove this workaround to avoid CAP number conflict in the upstream. */
+#define KVM_CAP_MEMORY_ATTRIBUTES 500
+#define KVM_CAP_USER_MEMORY2 750
+#define KVM_CAP_MEMORY_FAULT_INFO 800
+#define KVM_CAP_GUEST_MEMFD 900
+#define KVM_CAP_VM_TYPES 1000
+
 #ifdef KVM_CAP_IRQ_ROUTING

 struct kvm_irq_routing_irqchip {
@@ -1469,6 +1588,8 @@ struct kvm_vfio_spapr_tce {
 					struct kvm_userspace_memory_region)
 #define KVM_SET_TSS_ADDR          _IO(KVMIO,   0x47)
 #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO,  0x48, __u64)
+#define KVM_SET_USER_MEMORY_REGION2 _IOW(KVMIO, 0x49, \
+					 struct kvm_userspace_memory_region2)

 /* enable ucontrol for s390 */
 struct kvm_s390_ucas_mapping {
@@ -2252,4 +2373,26 @@ struct kvm_s390_zpci_op {
 /* flags for kvm_s390_zpci_op->u.reg_aen.flags */
 #define KVM_S390_ZPCIOP_REGAEN_HOST    (1 << 0)

+/* Available with KVM_CAP_MEMORY_ATTRIBUTES */
+#define KVM_SET_MEMORY_ATTRIBUTES              _IOW(KVMIO,  0xd2, struct kvm_memory_attributes)
+
+struct kvm_memory_attributes {
+	__u64 address;
+	__u64 size;
+	__u64 attributes;
+	__u64 flags;
+};
+
+#define KVM_MEMORY_ATTRIBUTE_PRIVATE           (1ULL << 3)
+
+#define KVM_CREATE_GUEST_MEMFD	_IOWR(KVMIO,  0xd4, struct kvm_create_guest_memfd)
+
+struct kvm_create_guest_memfd {
+	__u64 size;
+	__u64 flags;
+	__u64 reserved[6];
+};
+
+#define KVM_GUEST_MEMFD_ALLOW_HUGEPAGE		(1ULL << 0)
+
 #endif /* __LINUX_KVM_H */
--- a/meson.build
+++ b/meson.build
@@ -462,6 +462,7 @@ warn_flags = [
  '-Wno-tautological-type-limit-compare',
  '-Wno-psabi',
  '-Wno-gnu-variable-sized-type-not-at-end',
+  '-Wshadow=local',
 ]

 if targetos != 'darwin'
--- a/pc-bios/hppa-firmware.img
+++ b/pc-bios/hppa-firmware.img
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -821,6 +821,31 @@
 { 'command': 'pmemsave',
  'data': {'val': 'int', 'size': 'int', 'filename': 'str'} }

+##
+# @pmemclear:
+#
+# Clear a portion of guest physical memory to zeros.
+#
+# @val: the physical address of the guest to start from
+#
+# @size: the size of memory region to save
+#
+# Returns: Nothing on success
+#
+# Since: 7.1
+#
+# Example:
+#
+# -> { "execute": "pclear",
+#      "arguments": { "val": 10,
+#                     "size": 100 } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'pmemclear',
+  'data': {'val': 'int', 'size': 'int' } }
+
+
 ##
 # @Memdev:
 #
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -878,6 +878,36 @@
            'reduced-phys-bits': 'uint32',
            '*kernel-hashes': 'bool' } }

+##
+# @TdxGuestProperties:
+#
+# Properties for tdx-guest objects.
+#
+# @sept-ve-disable: toggle bit 28 of TD attributes to control disabling
+#     of EPT violation conversion to #VE on guest TD access of PENDING
+#     pages.  Some guest OS (e.g., Linux TD guest) may require this to
+#     be set, otherwise they refuse to boot.
+#
+# @mrconfigid: base64 encoded MRCONFIGID SHA384 digest
+#
+# @mrowner: base64 encoded MROWNER SHA384 digest
+#
+# @mrownerconfig: base64 MROWNERCONFIG SHA384 digest
+#
+# @quote-generation-socket: socket address for Quote Generation Service(QGS)
+#
+# @debug: Whether it's a debug TD or not (default: 0)
+#
+# Since: 8.2
+##
+{ 'struct': 'TdxGuestProperties',
+  'data': { '*sept-ve-disable': 'bool',
+            '*mrconfigid': 'str',
+            '*mrowner': 'str',
+            '*mrownerconfig': 'str',
+            '*quote-generation-socket': 'SocketAddress',
+            '*debug': 'bool' } }
+
 ##
 # @ThreadContextProperties:
 #
@@ -956,6 +986,7 @@
    'sev-guest',
    'thread-context',
    's390-pv-guest',
+    'tdx-guest',
    'throttle-group',
    'tls-creds-anon',
    'tls-creds-psk',
@@ -1022,6 +1053,7 @@
      'secret_keyring':             { 'type': 'SecretKeyringProperties',
                                      'if': 'CONFIG_SECRET_KEYRING' },
      'sev-guest':                  'SevGuestProperties',
+      'tdx-guest':                  'TdxGuestProperties',
      'thread-context':             'ThreadContextProperties',
      'throttle-group':             'ThrottleGroupProperties',
      'tls-creds-anon':             'TlsCredsAnonProperties',
--- a/qapi/run-state.json
+++ b/qapi/run-state.json
@@ -496,10 +496,12 @@
 #
 # @s390: s390 guest panic information type (Since: 2.12)
 #
+# @tdx: tdx guest panic information type (Since: 8.2)
+#
 # Since: 2.9
 ##
 { 'enum': 'GuestPanicInformationType',
-  'data': [ 'hyper-v', 's390' ] }
+  'data': [ 'hyper-v', 's390', 'tdx' ] }

 ##
 # @GuestPanicInformation:
@@ -514,7 +516,8 @@
 'base': {'type': 'GuestPanicInformationType'},
 'discriminator': 'type',
 'data': {'hyper-v': 'GuestPanicInformationHyperV',
-          's390': 'GuestPanicInformationS390'}}
+          's390': 'GuestPanicInformationS390',
+          'tdx' : 'GuestPanicInformationTdx'}}

 ##
 # @GuestPanicInformationHyperV:
@@ -577,6 +580,26 @@
          'psw-addr': 'uint64',
          'reason': 'S390CrashReason'}}

+##
+# @GuestPanicInformationTdx:
+#
+# TDX GHCI TDG.VP.VMCALL<ReportFatalError> specific guest panic information
+#
+# @error-code: TD-specific error code
+#
+# @gpa: 4KB-aligned guest physical address of the page that containing
+#     additional error data
+#
+# @message: TD guest provided message string.  (It's not so trustable
+#     and cannot be assumed to be well formed because it comes from guest)
+#
+# Since: 8.2
+##
+{'struct': 'GuestPanicInformationTdx',
+ 'data': {'error-code': 'uint64',
+          'gpa': 'uint64',
+          'message': 'str'}}
+
 ##
 # @MEMORY_FAILURE:
 #
--- a/roms/seabios-hppa
+++ b/roms/seabios-hppa
--- a/scripts/qapi/schema.py
+++ b/scripts/qapi/schema.py
@@ -76,7 +76,8 @@ class QAPISchemaEntity:
    def __repr__(self):
        if self.name is None:
            return "<%s at 0x%x>" % (type(self).__name__, id(self))
-        return "<%s:%s at 0x%x>" % type(self).__name__, self.name, id(self)
+        return "<%s:%s at 0x%x>" % (type(self).__name__, self.name,
+                                    id(self))

    def c_name(self):
        return c_name(self.name)
--- a/system/cpus.c
+++ b/system/cpus.c
@@ -822,6 +822,23 @@ exit:
    fclose(f);
 }

+void qmp_pmemclear(int64_t addr, int64_t size, Error **errp)
+{
+    uint32_t l;
+    uint8_t buf[1024];
+
+    memset(buf, 0, sizeof(buf));
+
+    while (size != 0) {
+        l = sizeof(buf);
+        if (l > size)
+            l = size;
+        cpu_physical_memory_write(addr, buf, l);
+        addr += l;
+        size -= l;
+    }
+}
+
 void qmp_inject_nmi(Error **errp)
 {
    nmi_monitor_handle(monitor_get_cpu_index(monitor_cur()), errp);
--- a/system/memory.c
+++ b/system/memory.c
@@ -1862,6 +1862,24 @@ bool memory_region_is_protected(MemoryRegion *mr)
    return mr->ram && (mr->ram_block->flags & RAM_PROTECTED);
 }

+bool memory_region_has_guest_memfd(MemoryRegion *mr)
+{
+    return mr->ram_block && mr->ram_block->guest_memfd >= 0;
+}
+
+bool memory_region_is_default_private(MemoryRegion *mr)
+{
+    return memory_region_has_guest_memfd(mr) &&
+           (mr->ram_block->flags & RAM_DEFAULT_PRIVATE);
+}
+
+void memory_region_set_default_private(MemoryRegion *mr)
+{
+    if (memory_region_has_guest_memfd(mr)) {
+        mr->ram_block->flags |= RAM_DEFAULT_PRIVATE;
+    }
+}
+
 uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr)
 {
    uint8_t mask = mr->dirty_log_mask;
@@ -3016,6 +3034,21 @@ void memory_global_dirty_log_stop(unsigned int flags)
    memory_global_dirty_log_do_stop(flags);
 }

+void memory_region_convert_mem_attr(MemoryRegionSection *section, bool shared)
+{
+    MemoryListener *listener;
+    if (!section->mr || !memory_region_has_guest_memfd(section->mr)) {
+        return;
+    }
+
+    QTAILQ_FOREACH(listener, &memory_listeners, link) {
+        if (!listener->convert_mem_attr) {
+            continue;
+        }
+        listener->convert_mem_attr(listener, section, shared);
+    }
+}
+
 static void listener_add_address_space(MemoryListener *listener,
                                       AddressSpace *as)
 {
@@ -3614,6 +3647,33 @@ void memory_region_init_ram(MemoryRegion *mr,
    vmstate_register_ram(mr, owner_dev);
 }

+void memory_region_init_ram_guest_memfd(MemoryRegion *mr,
+                                        Object *owner,
+                                        const char *name,
+                                        uint64_t size,
+                                        Error **errp)
+{
+    DeviceState *owner_dev;
+    Error *err = NULL;
+
+    memory_region_init_ram_flags_nomigrate(mr, owner, name, size,
+                                           RAM_GUEST_MEMFD, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+    memory_region_set_default_private(mr);
+
+    /* This will assert if owner is neither NULL nor a DeviceState.
+     * We only want the owner here for the purposes of defining a
+     * unique name for migration. TODO: Ideally we should implement
+     * a naming scheme for Objects which are not DeviceStates, in
+     * which case we can relax this restriction.
+     */
+    owner_dev = DEVICE(owner);
+    vmstate_register_ram(mr, owner_dev);
+}
+
 void memory_region_init_rom(MemoryRegion *mr,
                            Object *owner,
                            const char *name,
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -1803,6 +1803,40 @@ static void dirty_memory_extend(ram_addr_t old_ram_size,
    }
 }

+#ifdef CONFIG_KVM
+#define HPAGE_PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
+#define DEFAULT_PMD_SIZE (1ul << 21)
+
+static uint32_t get_thp_size(void)
+{
+    gchar *content = NULL;
+    const char *endptr;
+    static uint64_t thp_size = 0;
+    uint64_t tmp;
+
+    if (thp_size != 0) {
+        return thp_size;
+    }
+
+    if (g_file_get_contents(HPAGE_PMD_SIZE_PATH, &content, NULL, NULL) &&
+        !qemu_strtou64(content, &endptr, 0, &tmp) &&
+        (!endptr || *endptr == '\n')) {
+        /* Sanity-check the value and fallback to something reasonable. */
+        if (!tmp || !is_power_of_2(tmp)) {
+            warn_report("Read unsupported THP size: %" PRIx64, tmp);
+        } else {
+            thp_size = tmp;
+        }
+    }
+
+    if (!thp_size) {
+        thp_size = DEFAULT_PMD_SIZE;
+    }
+
+    return thp_size;
+}
+#endif
+
 static void ram_block_add(RAMBlock *new_block, Error **errp)
 {
    const bool noreserve = qemu_ram_is_noreserve(new_block);
@@ -1841,6 +1875,20 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
        }
    }

+#ifdef CONFIG_KVM
+    if (kvm_enabled() && new_block->flags & RAM_GUEST_MEMFD &&
+        new_block->guest_memfd < 0) {
+        uint64_t flags = QEMU_IS_ALIGNED(new_block->max_length, get_thp_size()) ?
+                         KVM_GUEST_MEMFD_ALLOW_HUGEPAGE : 0;
+        new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length,
+                                                        flags, errp);
+        if (new_block->guest_memfd < 0) {
+            qemu_mutex_unlock_ramlist();
+            return;
+        }
+    }
+#endif
+
    new_ram_size = MAX(old_ram_size,
              (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
    if (new_ram_size > old_ram_size) {
@@ -1903,7 +1951,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
    /* Just support these ram flags by now. */
    assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
                          RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY |
-                          RAM_READONLY_FD)) == 0);
+                          RAM_READONLY_FD | RAM_GUEST_MEMFD)) == 0);

    if (xen_enabled()) {
        error_setg(errp, "-mem-path not supported with Xen");
@@ -1938,6 +1986,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
    new_block->used_length = size;
    new_block->max_length = size;
    new_block->flags = ram_flags;
+    new_block->guest_memfd = -1;
    new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset,
                                     errp);
    if (!new_block->host) {
@@ -2016,7 +2065,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
    Error *local_err = NULL;

    assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC |
-                          RAM_NORESERVE)) == 0);
+                          RAM_NORESERVE| RAM_GUEST_MEMFD)) == 0);
    assert(!host ^ (ram_flags & RAM_PREALLOC));

    size = HOST_PAGE_ALIGN(size);
@@ -2028,6 +2077,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
    new_block->max_length = max_size;
    assert(max_size >= size);
    new_block->fd = -1;
+    new_block->guest_memfd = -1;
    new_block->page_size = qemu_real_host_page_size();
    new_block->host = host;
    new_block->flags = ram_flags;
@@ -2050,7 +2100,7 @@ RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
 RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags,
                         MemoryRegion *mr, Error **errp)
 {
-    assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE)) == 0);
+    assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0);
    return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp);
 }

@@ -2078,6 +2128,11 @@ static void reclaim_ramblock(RAMBlock *block)
    } else {
        qemu_anon_ram_free(block->host, block->max_length);
    }
+
+    if (block->guest_memfd >= 0) {
+        close(block->guest_memfd);
+    }
+
    g_free(block);
 }

@@ -3477,17 +3532,16 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)

    uint8_t *host_startaddr = rb->host + start;

-    if (!QEMU_PTR_IS_ALIGNED(host_startaddr, rb->page_size)) {
-        error_report("ram_block_discard_range: Unaligned start address: %p",
-                     host_startaddr);
+    if (!QEMU_PTR_IS_ALIGNED(host_startaddr, qemu_host_page_size)) {
+        error_report("%s: Unaligned start address: %p",
+                     __func__, host_startaddr);
        goto err;
    }

    if ((start + length) <= rb->max_length) {
        bool need_madvise, need_fallocate;
        if (!QEMU_IS_ALIGNED(length, rb->page_size)) {
-            error_report("ram_block_discard_range: Unaligned length: %zx",
-                         length);
+            error_report("%s: Unaligned length: %zx", __func__, length);
            goto err;
        }

@@ -3511,8 +3565,8 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
             * proper error message.
             */
            if (rb->flags & RAM_READONLY_FD) {
-                error_report("ram_block_discard_range: Discarding RAM"
-                             " with readonly files is not supported");
+                error_report("%s: Discarding RAM with readonly files is not"
+                             " supported", __func__);
                goto err;

            }
@@ -3527,27 +3581,26 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
             * file.
             */
            if (!qemu_ram_is_shared(rb)) {
-                warn_report_once("ram_block_discard_range: Discarding RAM"
+                warn_report_once("%s: Discarding RAM"
                                 " in private file mappings is possibly"
                                 " dangerous, because it will modify the"
                                 " underlying file and will affect other"
-                                 " users of the file");
+                                 " users of the file", __func__);
            }

            ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
                            start, length);
            if (ret) {
                ret = -errno;
-                error_report("ram_block_discard_range: Failed to fallocate "
-                             "%s:%" PRIx64 " +%zx (%d)",
-                             rb->idstr, start, length, ret);
+                error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)",
+                             __func__, rb->idstr, start, length, ret);
                goto err;
            }
 #else
            ret = -ENOSYS;
-            error_report("ram_block_discard_range: fallocate not available/file"
+            error_report("%s: fallocate not available/file"
                         "%s:%" PRIx64 " +%zx (%d)",
-                         rb->idstr, start, length, ret);
+                         __func__, rb->idstr, start, length, ret);
            goto err;
 #endif
        }
@@ -3565,31 +3618,52 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
            }
            if (ret) {
                ret = -errno;
-                error_report("ram_block_discard_range: Failed to discard range "
+                error_report("%s: Failed to discard range "
                             "%s:%" PRIx64 " +%zx (%d)",
-                             rb->idstr, start, length, ret);
+                             __func__, rb->idstr, start, length, ret);
                goto err;
            }
 #else
            ret = -ENOSYS;
-            error_report("ram_block_discard_range: MADVISE not available"
-                         "%s:%" PRIx64 " +%zx (%d)",
-                         rb->idstr, start, length, ret);
+            error_report("%s: MADVISE not available %s:%" PRIx64 " +%zx (%d)",
+                         __func__, rb->idstr, start, length, ret);
            goto err;
 #endif
        }
        trace_ram_block_discard_range(rb->idstr, host_startaddr, length,
                                      need_madvise, need_fallocate, ret);
    } else {
-        error_report("ram_block_discard_range: Overrun block '%s' (%" PRIu64
-                     "/%zx/" RAM_ADDR_FMT")",
-                     rb->idstr, start, length, rb->max_length);
+        error_report("%s: Overrun block '%s' (%" PRIu64 "/%zx/" RAM_ADDR_FMT")",
+                     __func__, rb->idstr, start, length, rb->max_length);
    }

 err:
    return ret;
 }

+static int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start,
+                                               size_t length)
+{
+    int ret = -1;
+
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+    ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+                    start, length);
+
+    if (ret) {
+        ret = -errno;
+        error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)",
+                     __func__, rb->idstr, start, length, ret);
+    }
+#else
+    ret = -ENOSYS;
+    error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)",
+                 __func__, rb->idstr, start, length, ret);
+#endif
+
+    return ret;
+}
+
 bool ramblock_is_pmem(RAMBlock *rb)
 {
    return rb->flags & RAM_PMEM;
@@ -3777,3 +3851,30 @@ bool ram_block_discard_is_required(void)
    return qatomic_read(&ram_block_discard_required_cnt) ||
           qatomic_read(&ram_block_coordinated_discard_required_cnt);
 }
+
+int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length,
+                            bool shared_to_private)
+{
+    if (!rb || rb->guest_memfd < 0) {
+        return -1;
+    }
+
+    if (!QEMU_PTR_IS_ALIGNED(start, qemu_host_page_size) ||
+        !QEMU_PTR_IS_ALIGNED(length, qemu_host_page_size)) {
+        return -1;
+    }
+
+    if (!length) {
+        return -1;
+    }
+
+    if (start + length > rb->max_length) {
+        return -1;
+    }
+
+    if (shared_to_private) {
+        return ram_block_discard_range(rb, start, length);
+    } else {
+        return ram_block_discard_guest_memfd_range(rb, start, length);
+    }
+}
--- a/system/runstate.c
+++ b/system/runstate.c
@@ -518,6 +518,52 @@ static void qemu_system_wakeup(void)
    }
 }

+static char* tdx_parse_panic_message(char *message)
+{
+    bool printable = false;
+    char *buf = NULL;
+    int len = 0, i;
+
+    /*
+     * Although message is defined as a json string, we shouldn't
+     * unconditionally treat it as is because the guest generated it and
+     * it's not necessarily trustable.
+     */
+    if (message) {
+        /* The caller guarantees the NUL-terminated string. */
+        len = strlen(message);
+
+        printable = len > 0;
+        for (i = 0; i < len; i++) {
+            if (!(0x20 <= message[i] && message[i] <= 0x7e)) {
+                printable = false;
+                break;
+            }
+        }
+    }
+
+    if (!printable && len) {
+        /* 3 = length of "%02x " */
+        buf = g_malloc(len * 3);
+        for (i = 0; i < len; i++) {
+            if (message[i] == '\0') {
+                break;
+            } else {
+                sprintf(buf + 3 * i, "%02x ", message[i]);
+            }
+        }
+        if (i > 0)
+            /* replace the last ' '(space) to NUL */
+            buf[i * 3 - 1] = '\0';
+        else
+            buf[0] = '\0';
+
+        return buf;
+    }
+
+    return message;
+}
+
 void qemu_system_guest_panicked(GuestPanicInformation *info)
 {
    qemu_log_mask(LOG_GUEST_ERROR, "Guest crashed");
@@ -559,7 +605,15 @@ void qemu_system_guest_panicked(GuestPanicInformation *info)
                          S390CrashReason_str(info->u.s390.reason),
                          info->u.s390.psw_mask,
                          info->u.s390.psw_addr);
+        } else if (info->type == GUEST_PANIC_INFORMATION_TYPE_TDX) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          " TDX guest reports fatal error:\"%s\""
+                          " error code: 0x%016" PRIx64 " gpa page: 0x%016" PRIx64 "\n",
+                          tdx_parse_panic_message(info->u.tdx.message),
+                          info->u.tdx.error_code,
+                          info->u.tdx.gpa);
        }
+
        qapi_free_GuestPanicInformation(info);
    }
 }
--- a/system/vl.c
+++ b/system/vl.c
@@ -2506,7 +2506,7 @@ static void qemu_process_early_options(void)

    /* Open the logfile at this point and set the log mask if necessary.  */
    {
-        int mask = 0;
+        int mask = LOG_GUEST_ERROR;
        if (log_mask) {
            mask = qemu_str_to_log_mask(log_mask);
            if (!mask) {
--- a/target/arm/tcg/cpu32.c
+++ b/target/arm/tcg/cpu32.c
@@ -351,6 +351,7 @@ static void cortex_a8_initfn(Object *obj)
    set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
    set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
    set_feature(&cpu->env, ARM_FEATURE_EL3);
+    set_feature(&cpu->env, ARM_FEATURE_PMU);
    cpu->midr = 0x410fc080;
    cpu->reset_fpsid = 0x410330c0;
    cpu->isar.mvfr0 = 0x11110222;
@@ -418,6 +419,7 @@ static void cortex_a9_initfn(Object *obj)
    set_feature(&cpu->env, ARM_FEATURE_NEON);
    set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
    set_feature(&cpu->env, ARM_FEATURE_EL3);
+    set_feature(&cpu->env, ARM_FEATURE_PMU);
    /*
     * Note that A9 supports the MP extensions even for
     * A9UP and single-core A9MP (which are both different
--- a/target/arm/tcg/mte_helper.c
+++ b/target/arm/tcg/mte_helper.c
@@ -1101,10 +1101,18 @@ uint64_t mte_mops_probe_rev(CPUARMState *env, uint64_t ptr, uint64_t size,
    uint32_t n;

    mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
-    /* True probe; this will never fault */
+    /*
+     * True probe; this will never fault. Note that our caller passes
+     * us a pointer to the end of the region, but allocation_tag_mem_probe()
+     * wants a pointer to the start. Because we know we don't span a page
+     * boundary and that allocation_tag_mem_probe() doesn't otherwise care
+     * about the size, pass in a size of 1 byte. This is simpler than
+     * adjusting the ptr to point to the start of the region and then having
+     * to adjust the returned 'mem' to get the end of the tag memory.
+     */
    mem = allocation_tag_mem_probe(env, mmu_idx, ptr,
                                   w ? MMU_DATA_STORE : MMU_DATA_LOAD,
-                                   size, MMU_DATA_LOAD, true, 0);
+                                   1, MMU_DATA_LOAD, true, 0);
    if (!mem) {
        return size;
    }
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -2351,6 +2351,8 @@ static bool trans_SVC(DisasContext *s, arg_i *a)

 static bool trans_HVC(DisasContext *s, arg_i *a)
 {
+    int target_el = s->current_el == 3 ? 3 : 2;
+
    if (s->current_el == 0) {
        unallocated_encoding(s);
        return true;
@@ -2363,7 +2365,7 @@ static bool trans_HVC(DisasContext *s, arg_i *a)
    gen_helper_pre_hvc(tcg_env);
    /* Architecture requires ss advance before we do the actual work */
    gen_ss_advance(s);
-    gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), 2);
+    gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el);
    return true;
 }

--- a/target/hppa/cpu-param.h
+++ b/target/hppa/cpu-param.h
@@ -14,7 +14,8 @@
 # define TARGET_PHYS_ADDR_SPACE_BITS  32
 # define TARGET_VIRT_ADDR_SPACE_BITS  32
 #else
-# define TARGET_PHYS_ADDR_SPACE_BITS  64
+/* ??? PA-8000 through 8600 have 40 bits; PA-8700 and 8900 have 44 bits. */
+# define TARGET_PHYS_ADDR_SPACE_BITS  40
 # define TARGET_VIRT_ADDR_SPACE_BITS  64
 #endif

--- a/target/hppa/cpu.h
+++ b/target/hppa/cpu.h
@@ -31,23 +31,25 @@
   basis.  It's probably easier to fall back to a strong memory model.  */
 #define TCG_GUEST_DEFAULT_MO        TCG_MO_ALL

-#define MMU_KERNEL_IDX    7
-#define MMU_KERNEL_P_IDX  8
-#define MMU_PL1_IDX       9
-#define MMU_PL1_P_IDX     10
-#define MMU_PL2_IDX       11
-#define MMU_PL2_P_IDX     12
-#define MMU_USER_IDX      13
-#define MMU_USER_P_IDX    14
-#define MMU_PHYS_IDX      15
+#define MMU_ABS_W_IDX     6
+#define MMU_ABS_IDX       7
+#define MMU_KERNEL_IDX    8
+#define MMU_KERNEL_P_IDX  9
+#define MMU_PL1_IDX       10
+#define MMU_PL1_P_IDX     11
+#define MMU_PL2_IDX       12
+#define MMU_PL2_P_IDX     13
+#define MMU_USER_IDX      14
+#define MMU_USER_P_IDX    15

+#define MMU_IDX_MMU_DISABLED(MIDX)  ((MIDX) < MMU_KERNEL_IDX)
 #define MMU_IDX_TO_PRIV(MIDX)       (((MIDX) - MMU_KERNEL_IDX) / 2)
 #define MMU_IDX_TO_P(MIDX)          (((MIDX) - MMU_KERNEL_IDX) & 1)
 #define PRIV_P_TO_MMU_IDX(PRIV, P)  ((PRIV) * 2 + !!(P) + MMU_KERNEL_IDX)

 #define TARGET_INSN_START_EXTRA_WORDS 2

-/* No need to flush MMU_PHYS_IDX  */
+/* No need to flush MMU_ABS*_IDX  */
 #define HPPA_MMU_FLUSH_MASK                             \
        (1 << MMU_KERNEL_IDX | 1 << MMU_KERNEL_P_IDX |  \
         1 << MMU_PL1_IDX    | 1 << MMU_PL1_P_IDX    |  \
@@ -287,7 +289,8 @@ static inline int cpu_mmu_index(CPUHPPAState *env, bool ifetch)
    if (env->psw & (ifetch ? PSW_C : PSW_D)) {
        return PRIV_P_TO_MMU_IDX(env->iaoq_f & 3, env->psw & PSW_P);
    }
-    return MMU_PHYS_IDX;  /* mmu disabled */
+    /* mmu disabled */
+    return env->psw & PSW_W ? MMU_ABS_W_IDX : MMU_ABS_IDX;
 #endif
 }

--- a/target/hppa/int_helper.c
+++ b/target/hppa/int_helper.c
@@ -126,7 +126,7 @@ void hppa_cpu_do_interrupt(CPUState *cs)
        env->cr[CR_IIASQ] =
            hppa_form_gva_psw(old_psw, env->iasq_f, env->iaoq_f) >> 32;
        env->cr_back[0] =
-            hppa_form_gva_psw(old_psw, env->iasq_f, env->iaoq_f) >> 32;
+            hppa_form_gva_psw(old_psw, env->iasq_b, env->iaoq_b) >> 32;
    } else {
        env->cr[CR_IIASQ] = 0;
        env->cr_back[0] = 0;
--- a/target/hppa/mem_helper.c
+++ b/target/hppa/mem_helper.c
@@ -27,41 +27,39 @@

 hwaddr hppa_abs_to_phys_pa2_w1(vaddr addr)
 {
-    if (likely(extract64(addr, 58, 4) != 0xf)) {
-        /* Memory address space */
-        return addr & MAKE_64BIT_MASK(0, 62);
-    }
-    if (extract64(addr, 54, 4) != 0) {
-        /* I/O address space */
-        return addr | MAKE_64BIT_MASK(62, 2);
-    }
-    /* PDC address space */
-    return (addr & MAKE_64BIT_MASK(0, 54)) | MAKE_64BIT_MASK(60, 4);
+    /*
+     * Figure H-8 "62-bit Absolute Accesses when PSW W-bit is 1" describes
+     * an algorithm in which a 62-bit absolute address is transformed to
+     * a 64-bit physical address.  This must then be combined with that
+     * pictured in Figure H-11 "Physical Address Space Mapping", in which
+     * the full physical address is truncated to the N-bit physical address
+     * supported by the implementation.
+     *
+     * Since the supported physical address space is below 54 bits, the
+     * H-8 algorithm is moot and all that is left is to truncate.
+     */
+    QEMU_BUILD_BUG_ON(TARGET_PHYS_ADDR_SPACE_BITS > 54);
+    return sextract64(addr, 0, TARGET_PHYS_ADDR_SPACE_BITS);
 }

 hwaddr hppa_abs_to_phys_pa2_w0(vaddr addr)
 {
+    /*
+     * See Figure H-10, "Absolute Accesses when PSW W-bit is 0",
+     * combined with Figure H-11, as above.
+     */
    if (likely(extract32(addr, 28, 4) != 0xf)) {
        /* Memory address space */
-        return addr & MAKE_64BIT_MASK(0, 32);
-    }
-    if (extract32(addr, 24, 4) != 0) {
+        addr = (uint32_t)addr;
+    } else if (extract32(addr, 24, 4) != 0) {
        /* I/O address space */
-        return addr | MAKE_64BIT_MASK(32, 32);
-    }
-    /* PDC address space */
-    return (addr & MAKE_64BIT_MASK(0, 24)) | MAKE_64BIT_MASK(60, 4);
-}
-
-static hwaddr hppa_abs_to_phys(CPUHPPAState *env, vaddr addr)
-{
-    if (!hppa_is_pa20(env)) {
-        return addr;
-    } else if (env->psw & PSW_W) {
-        return hppa_abs_to_phys_pa2_w1(addr);
+        addr = (int32_t)addr;
    } else {
-        return hppa_abs_to_phys_pa2_w0(addr);
+        /* PDC address space */
+        addr &= MAKE_64BIT_MASK(0, 24);
+        addr |= -1ull << (TARGET_PHYS_ADDR_SPACE_BITS - 4);
    }
+    return addr;
 }

 static HPPATLBEntry *hppa_find_tlb(CPUHPPAState *env, vaddr addr)
@@ -161,9 +159,22 @@ int hppa_get_physical_address(CPUHPPAState *env, vaddr addr, int mmu_idx,
        *tlb_entry = NULL;
    }

-    /* Virtual translation disabled.  Direct map virtual to physical.  */
-    if (mmu_idx == MMU_PHYS_IDX) {
-        phys = addr;
+    /* Virtual translation disabled.  Map absolute to physical.  */
+    if (MMU_IDX_MMU_DISABLED(mmu_idx)) {
+        switch (mmu_idx) {
+        case MMU_ABS_W_IDX:
+            phys = hppa_abs_to_phys_pa2_w1(addr);
+            break;
+        case MMU_ABS_IDX:
+            if (hppa_is_pa20(env)) {
+                phys = hppa_abs_to_phys_pa2_w0(addr);
+            } else {
+                phys = (uint32_t)addr;
+            }
+            break;
+        default:
+            g_assert_not_reached();
+        }
        prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
        goto egress;
    }
@@ -261,7 +272,7 @@ int hppa_get_physical_address(CPUHPPAState *env, vaddr addr, int mmu_idx,
    }

 egress:
-    *pphys = phys = hppa_abs_to_phys(env, phys);
+    *pphys = phys;
    *pprot = prot;
    trace_hppa_tlb_get_physical_address(env, ret, prot, addr, phys);
    return ret;
@@ -271,16 +282,15 @@ hwaddr hppa_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
 {
    HPPACPU *cpu = HPPA_CPU(cs);
    hwaddr phys;
-    int prot, excp;
+    int prot, excp, mmu_idx;

    /* If the (data) mmu is disabled, bypass translation.  */
    /* ??? We really ought to know if the code mmu is disabled too,
       in order to get the correct debugging dumps.  */
-    if (!(cpu->env.psw & PSW_D)) {
-        return hppa_abs_to_phys(&cpu->env, addr);
-    }
+    mmu_idx = (cpu->env.psw & PSW_D ? MMU_KERNEL_IDX :
+               cpu->env.psw & PSW_W ? MMU_ABS_W_IDX : MMU_ABS_IDX);

-    excp = hppa_get_physical_address(&cpu->env, addr, MMU_KERNEL_IDX, 0,
+    excp = hppa_get_physical_address(&cpu->env, addr, mmu_idx, 0,
                                     &phys, &prot, NULL);

    /* Since we're translating for debugging, the only error that is a
@@ -367,8 +377,8 @@ bool hppa_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
        trace_hppa_tlb_fill_excp(env, addr, size, type, mmu_idx);

        /* Failure.  Raise the indicated exception.  */
-        raise_exception_with_ior(env, excp, retaddr,
-                                 addr, mmu_idx == MMU_PHYS_IDX);
+        raise_exception_with_ior(env, excp, retaddr, addr,
+                                 MMU_IDX_MMU_DISABLED(mmu_idx));
    }

    trace_hppa_tlb_fill_success(env, addr & TARGET_PAGE_MASK,
@@ -450,7 +460,7 @@ static void itlbt_pa20(CPUHPPAState *env, target_ulong r1,
    int mask_shift;

    mask_shift = 2 * (r1 & 0xf);
-    va_size = TARGET_PAGE_SIZE << mask_shift;
+    va_size = (uint64_t)TARGET_PAGE_SIZE << mask_shift;
    va_b &= -va_size;
    va_e = va_b + va_size - 1;

@@ -459,7 +469,14 @@ static void itlbt_pa20(CPUHPPAState *env, target_ulong r1,

    ent->itree.start = va_b;
    ent->itree.last = va_e;
-    ent->pa = (r1 << 7) & (TARGET_PAGE_MASK << mask_shift);
+
+    /* Extract all 52 bits present in the page table entry. */
+    ent->pa = r1 << (TARGET_PAGE_BITS - 5);
+    /* Align per the page size. */
+    ent->pa &= TARGET_PAGE_MASK << mask_shift;
+    /* Ignore the bits beyond physical address space. */
+    ent->pa = sextract64(ent->pa, 0, TARGET_PHYS_ADDR_SPACE_BITS);
+
    ent->t = extract64(r2, 61, 1);
    ent->d = extract64(r2, 60, 1);
    ent->b = extract64(r2, 59, 1);
@@ -505,7 +522,7 @@ static void ptlb_work(CPUState *cpu, run_on_cpu_data data)
     */
    end = start & 0xf;
    start &= TARGET_PAGE_MASK;
-    end = TARGET_PAGE_SIZE << (2 * end);
+    end = (vaddr)TARGET_PAGE_SIZE << (2 * end);
    end = start + end - 1;

    hppa_flush_tlb_range(env, start, end);
--- a/target/hppa/op_helper.c
+++ b/target/hppa/op_helper.c
@@ -338,7 +338,7 @@ target_ulong HELPER(probe)(CPUHPPAState *env, target_ulong addr,
 #ifdef CONFIG_USER_ONLY
    return page_check_range(addr, 1, want);
 #else
-    int prot, excp;
+    int prot, excp, mmu_idx;
    hwaddr phys;

    trace_hppa_tlb_probe(addr, level, want);
@@ -347,7 +347,8 @@ target_ulong HELPER(probe)(CPUHPPAState *env, target_ulong addr,
        return 0;
    }

-    excp = hppa_get_physical_address(env, addr, level, 0, &phys,
+    mmu_idx = PRIV_P_TO_MMU_IDX(level, env->psw & PSW_P);
+    excp = hppa_get_physical_address(env, addr, mmu_idx, 0, &phys,
                                     &prot, NULL);
    if (excp >= 0) {
        if (env->psw & PSW_Q) {
--- a/target/hppa/translate.c
+++ b/target/hppa/translate.c
@@ -69,19 +69,24 @@ typedef struct DisasContext {
 } DisasContext;

 #ifdef CONFIG_USER_ONLY
-#define UNALIGN(C)  (C)->unalign
+#define UNALIGN(C)       (C)->unalign
+#define MMU_DISABLED(C)  false
 #else
-#define UNALIGN(C)  MO_ALIGN
+#define UNALIGN(C)       MO_ALIGN
+#define MMU_DISABLED(C)  MMU_IDX_MMU_DISABLED((C)->mmu_idx)
 #endif

 /* Note that ssm/rsm instructions number PSW_W and PSW_E differently.  */
 static int expand_sm_imm(DisasContext *ctx, int val)
 {
-    if (val & PSW_SM_E) {
-        val = (val & ~PSW_SM_E) | PSW_E;
-    }
-    if (val & PSW_SM_W) {
-        val = (val & ~PSW_SM_W) | PSW_W;
+    /* Keep unimplemented bits disabled -- see cpu_hppa_put_psw. */
+    if (ctx->is_pa20) {
+        if (val & PSW_SM_W) {
+            val |= PSW_W;
+        }
+        val &= ~(PSW_SM_W | PSW_SM_E | PSW_G);
+    } else {
+        val &= ~(PSW_SM_W | PSW_SM_E | PSW_O);
    }
    return val;
 }
@@ -1372,7 +1377,7 @@ static void do_load_32(DisasContext *ctx, TCGv_i32 dest, unsigned rb,
    assert(ctx->null_cond.c == TCG_COND_NEVER);

    form_gva(ctx, &addr, &ofs, rb, rx, scale, disp, sp, modify,
-             ctx->mmu_idx == MMU_PHYS_IDX);
+             MMU_DISABLED(ctx));
    tcg_gen_qemu_ld_i32(dest, addr, ctx->mmu_idx, mop | UNALIGN(ctx));
    if (modify) {
        save_gpr(ctx, rb, ofs);
@@ -1390,7 +1395,7 @@ static void do_load_64(DisasContext *ctx, TCGv_i64 dest, unsigned rb,
    assert(ctx->null_cond.c == TCG_COND_NEVER);

    form_gva(ctx, &addr, &ofs, rb, rx, scale, disp, sp, modify,
-             ctx->mmu_idx == MMU_PHYS_IDX);
+             MMU_DISABLED(ctx));
    tcg_gen_qemu_ld_i64(dest, addr, ctx->mmu_idx, mop | UNALIGN(ctx));
    if (modify) {
        save_gpr(ctx, rb, ofs);
@@ -1408,7 +1413,7 @@ static void do_store_32(DisasContext *ctx, TCGv_i32 src, unsigned rb,
    assert(ctx->null_cond.c == TCG_COND_NEVER);

    form_gva(ctx, &addr, &ofs, rb, rx, scale, disp, sp, modify,
-             ctx->mmu_idx == MMU_PHYS_IDX);
+             MMU_DISABLED(ctx));
    tcg_gen_qemu_st_i32(src, addr, ctx->mmu_idx, mop | UNALIGN(ctx));
    if (modify) {
        save_gpr(ctx, rb, ofs);
@@ -1426,7 +1431,7 @@ static void do_store_64(DisasContext *ctx, TCGv_i64 src, unsigned rb,
    assert(ctx->null_cond.c == TCG_COND_NEVER);

    form_gva(ctx, &addr, &ofs, rb, rx, scale, disp, sp, modify,
-             ctx->mmu_idx == MMU_PHYS_IDX);
+             MMU_DISABLED(ctx));
    tcg_gen_qemu_st_i64(src, addr, ctx->mmu_idx, mop | UNALIGN(ctx));
    if (modify) {
        save_gpr(ctx, rb, ofs);
@@ -2294,7 +2299,7 @@ static bool trans_probe(DisasContext *ctx, arg_probe *a)
    form_gva(ctx, &addr, &ofs, a->b, 0, 0, 0, a->sp, 0, false);

    if (a->imm) {
-        level = tcg_constant_i32(a->ri);
+        level = tcg_constant_i32(a->ri & 3);
    } else {
        level = tcg_temp_new_i32();
        tcg_gen_extrl_i64_i32(level, load_gpr(ctx, a->ri));
@@ -3075,7 +3080,7 @@ static bool trans_ldc(DisasContext *ctx, arg_ldst *a)
    }

    form_gva(ctx, &addr, &ofs, a->b, a->x, a->scale ? a->size : 0,
-             a->disp, a->sp, a->m, ctx->mmu_idx == MMU_PHYS_IDX);
+             a->disp, a->sp, a->m, MMU_DISABLED(ctx));

    /*
     * For hppa1.1, LDCW is undefined unless aligned mod 16.
@@ -3105,7 +3110,7 @@ static bool trans_stby(DisasContext *ctx, arg_stby *a)
    nullify_over(ctx);

    form_gva(ctx, &addr, &ofs, a->b, 0, 0, a->disp, a->sp, a->m,
-             ctx->mmu_idx == MMU_PHYS_IDX);
+             MMU_DISABLED(ctx));
    val = load_gpr(ctx, a->r);
    if (a->a) {
        if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
@@ -3139,7 +3144,7 @@ static bool trans_stdby(DisasContext *ctx, arg_stby *a)
    nullify_over(ctx);

    form_gva(ctx, &addr, &ofs, a->b, 0, 0, a->disp, a->sp, a->m,
-             ctx->mmu_idx == MMU_PHYS_IDX);
+             MMU_DISABLED(ctx));
    val = load_gpr(ctx, a->r);
    if (a->a) {
        if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
@@ -3167,7 +3172,7 @@ static bool trans_lda(DisasContext *ctx, arg_ldst *a)
    int hold_mmu_idx = ctx->mmu_idx;

    CHECK_MOST_PRIVILEGED(EXCP_PRIV_OPR);
-    ctx->mmu_idx = MMU_PHYS_IDX;
+    ctx->mmu_idx = ctx->tb_flags & PSW_W ? MMU_ABS_W_IDX : MMU_ABS_IDX;
    trans_ld(ctx, a);
    ctx->mmu_idx = hold_mmu_idx;
    return true;
@@ -3178,7 +3183,7 @@ static bool trans_sta(DisasContext *ctx, arg_ldst *a)
    int hold_mmu_idx = ctx->mmu_idx;

    CHECK_MOST_PRIVILEGED(EXCP_PRIV_OPR);
-    ctx->mmu_idx = MMU_PHYS_IDX;
+    ctx->mmu_idx = ctx->tb_flags & PSW_W ? MMU_ABS_W_IDX : MMU_ABS_IDX;
    trans_st(ctx, a);
    ctx->mmu_idx = hold_mmu_idx;
    return true;
@@ -4430,7 +4435,7 @@ static void hppa_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
    ctx->privilege = (ctx->tb_flags >> TB_FLAG_PRIV_SHIFT) & 3;
    ctx->mmu_idx = (ctx->tb_flags & PSW_D
                    ? PRIV_P_TO_MMU_IDX(ctx->privilege, ctx->tb_flags & PSW_P)
-                    : MMU_PHYS_IDX);
+                    : ctx->tb_flags & PSW_W ? MMU_ABS_W_IDX : MMU_ABS_IDX);

    /* Recover the IAOQ values from the GVA + PRIV.  */
    uint64_t cs_base = ctx->base.tb->cs_base;
--- a/target/i386/cpu-internal.h
+++ b/target/i386/cpu-internal.h
@@ -20,6 +20,15 @@
 #ifndef I386_CPU_INTERNAL_H
 #define I386_CPU_INTERNAL_H

+typedef struct FeatureMask {
+    FeatureWord index;
+    uint64_t mask;
+} FeatureMask;
+
+typedef struct FeatureDep {
+    FeatureMask from, to;
+} FeatureDep;
+
 typedef enum FeatureWordType {
   CPUID_FEATURE_WORD,
   MSR_FEATURE_WORD,
@@ -56,6 +65,7 @@ typedef struct FeatureWordInfo {
 extern FeatureWordInfo feature_word_info[];

 void x86_cpu_expand_features(X86CPU *cpu, Error **errp);
+char *feature_word_description(FeatureWordInfo *f, uint32_t bit);

 #ifndef CONFIG_USER_ONLY
 GuestPanicInformation *x86_cpu_get_crash_info(CPUState *cs);
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -41,6 +41,7 @@
 #include "exec/address-spaces.h"
 #include "hw/boards.h"
 #include "hw/i386/sgx-epc.h"
+#include "kvm/tdx.h"
 #endif

 #include "disas/capstone.h"
@@ -1442,15 +1443,6 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
    },
 };

-typedef struct FeatureMask {
-    FeatureWord index;
-    uint64_t mask;
-} FeatureMask;
-
-typedef struct FeatureDep {
-    FeatureMask from, to;
-} FeatureDep;
-
 static FeatureDep feature_dependencies[] = {
    {
        .from = { FEAT_7_0_EDX,             CPUID_7_0_EDX_ARCH_CAPABILITIES },
@@ -1575,9 +1567,6 @@ static const X86RegisterInfo32 x86_reg_info_32[CPU_NB_REGS32] = {
 };
 #undef REGISTER

-/* CPUID feature bits available in XSS */
-#define CPUID_XSTATE_XSS_MASK    (XSTATE_ARCH_LBR_MASK)
-
 ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = {
    [XSTATE_FP_BIT] = {
        /* x87 FP state component is always enabled if XSAVE is supported */
@@ -4099,6 +4088,113 @@ static const X86CPUDefinition builtin_x86_defs[] = {
            { /* end of list */ },
        },
    },
+    {
+	.name = "TDX-SapphireRapids",
+	.level = 0x21,
+	.vendor = CPUID_VENDOR_INTEL,
+	.family = 6,
+	.model = 143,
+	.stepping = 4,
+	.features[FEAT_1_EDX] =
+	     /* CPUID_CONFIG: CPUID_ACPI CPUID_HT CPUID_TM CPUID_PBE */
+	    CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC |
+	    CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC |
+	    CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV |
+	    CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_DTS |
+	    CPUID_MMX | CPUID_FXSR | CPUID_SSE |
+	    CPUID_SSE2 | CPUID_SS,
+	.features[FEAT_1_ECX] =
+	    /*
+	     * CPUID_CONFIG: CPUID_EXT_EST CPUID_EXT_TM2 CPUID_EXT_XTPR
+	     * XFAM: CPUID_EXT_FMA CPUID_EXT_AVX CPUID_EXT_F16C
+	     */
+	    CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_DTES64 |
+	    CPUID_EXT_DSCPL | CPUID_EXT_SSSE3 | CPUID_EXT_FMA |
+	    CPUID_EXT_CX16 | CPUID_EXT_PDCM | CPUID_EXT_PCID |
+	    CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_X2APIC |
+	    CPUID_EXT_MOVBE | CPUID_EXT_POPCNT |
+	    CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_AES |
+	    CPUID_EXT_XSAVE | CPUID_EXT_AVX | CPUID_EXT_F16C |
+	    CPUID_EXT_RDRAND,
+	.features[FEAT_8000_0001_EDX] =
+	    CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB |
+	    CPUID_EXT2_RDTSCP | CPUID_EXT2_LM,
+	.features[FEAT_8000_0001_ECX] =
+	    CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH,
+	.features[FEAT_7_0_EBX] =
+	    /*
+	     * CPUID_CONFIG: CPUID_7_0_EBX_BMI1 CPUID_7_0_EBX_BMI2
+	     *               CPUID_7_0_EBX_PQM CPUID_7_0_EBX_CQE
+         *               CPUID_7_0_EBX_ADX
+	     * XFAM: CPUID_7_0_EBX_AVX2 CPUID_7_0_EBX_AVX512F
+	     *       CPUID_7_0_EBX_AVX512DQ CPUID_7_0_EBX_AVX512IFMA
+	     *       CPUID_7_0_EBX_INTEL_PT CPUID_7_0_EBX_AVX512CD
+	     *       CPUID_7_0_EBX_AVX512BW CPUID_7_0_EBX_AVX512VL
+	     *
+	     *       CPUID_7_0_EBX_AVX512PF CPUID_7_0_EBX_AVX512ER not
+	     *       supported on SPR-D
+	     */
+	    CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 |
+	    CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 |
+	    CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 |
+	    CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID |
+	    CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_AVX512F |
+	    CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_RDSEED |
+	    CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP |
+	    CPUID_7_0_EBX_AVX512IFMA | CPUID_7_0_EBX_CLFLUSHOPT |
+	    CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_AVX512CD |
+	    CPUID_7_0_EBX_SHA_NI | CPUID_7_0_EBX_AVX512BW |
+	    CPUID_7_0_EBX_AVX512VL,
+	.features[FEAT_7_0_ECX] =
+	    /*
+	     * CPUID_CONFIG: CPUID_7_0_ECX_WAITPKG CPUID_7_0_ECX_TME
+	     * XFAM: CPUID_7_0_ECX_AVX512_VBMI CPUID_7_0_ECX_PKU
+	     *        CPUID_7_0_ECX_AVX512_VBMI2 CPUID_7_0_ECX_CET_SHSTK
+	     *        CPUID_7_0_ECX_VAES CPUID_7_0_ECX_VPCLMULQDQ
+	     *        CPUID_7_0_ECX_AVX512VNNI CPUID_7_0_ECX_AVX512BITALG
+	     *        CPUID_7_0_ECX_AVX512_VPOPCNTDQ
+	     * ATTRIBUTE: CPUID_7_0_ECX_PKS
+	     */
+	    CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU |
+	    CPUID_7_0_ECX_WAITPKG | CPUID_7_0_ECX_AVX512_VBMI2 |
+	    CPUID_7_0_ECX_GFNI | CPUID_7_0_ECX_VAES |
+	    CPUID_7_0_ECX_VPCLMULQDQ | CPUID_7_0_ECX_AVX512VNNI |
+	    CPUID_7_0_ECX_AVX512BITALG | CPUID_7_0_ECX_AVX512_VPOPCNTDQ |
+	    CPUID_7_0_ECX_LA57 | CPUID_7_0_ECX_RDPID |
+	    CPUID_7_0_ECX_BUS_LOCK_DETECT | CPUID_7_0_ECX_CLDEMOTE |
+	    CPUID_7_0_ECX_MOVDIRI | CPUID_7_0_ECX_MOVDIR64B,
+	.features[FEAT_7_0_EDX] =
+	     /*
+	     * XFAM: CPUID_7_0_EDX_ULI CPUID_7_0_EDX_ARCH_LBR
+	     *       CPUID_7_0_EDX_AVX512_VP2INTERSECT
+	     *       CPUID_7_0_EDX_AMX_BF16 CPUID_7_0_EDX_CET_IBT
+	     *       CPUID_7_0_EDX_AMX_INT8 CPUID_7_0_EDX_AMX_TILE
+	     *
+	     *
+	     * CPUID_7_0_EDX_AVX512_4VNNIW  CPUID_7_0_EDX_AVX512_4FMAPS
+	     * not supported
+	     */
+	    CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_SERIALIZE |
+	    CPUID_7_0_EDX_TSX_LDTRK | CPUID_7_0_EDX_AMX_BF16 |
+	    CPUID_7_0_EDX_AVX512_FP16 | CPUID_7_0_EDX_AMX_TILE |
+	    CPUID_7_0_EDX_AMX_INT8  | CPUID_7_0_EDX_SPEC_CTRL |
+	    CPUID_7_0_EDX_STIBP | CPUID_7_0_EDX_ARCH_CAPABILITIES |
+	    CPUID_7_0_EDX_CORE_CAPABILITY | CPUID_7_0_EDX_SPEC_CTRL_SSBD,
+	.features[FEAT_XSAVE] =
+	    /*
+	     * XFAM: CPUID_XSAVE_XFD depend on all of XFAM[]
+	     */
+	    CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
+	    CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES,
+	.features[FEAT_6_EAX] =
+	    CPUID_6_EAX_ARAT,
+	.features[FEAT_KVM] = (1ULL << KVM_FEATURE_NOP_IO_DELAY) |
+	    (1ULL << KVM_FEATURE_PV_UNHALT) | (1ULL << KVM_FEATURE_PV_TLB_FLUSH) |
+	    (1ULL << KVM_FEATURE_PV_SEND_IPI) | (1ULL << KVM_FEATURE_POLL_CONTROL) |
+	    (1ULL << KVM_FEATURE_PV_SCHED_YIELD) | (1ULL << KVM_FEATURE_MSI_EXT_DEST_ID),
+	.xlevel = 0x80000008,
+	.model_id = "Genuine Intel(R) CPU",
+    },
    {
        .name = "Denverton",
        .level = 21,
@@ -4998,7 +5094,7 @@ static const TypeInfo max_x86_cpu_type_info = {
    .class_init = max_x86_cpu_class_init,
 };

-static char *feature_word_description(FeatureWordInfo *f, uint32_t bit)
+char *feature_word_description(FeatureWordInfo *f, uint32_t bit)
 {
    assert(f->type == CPUID_FEATURE_WORD || f->type == MSR_FEATURE_WORD);

@@ -5031,8 +5127,8 @@ static bool x86_cpu_have_filtered_features(X86CPU *cpu)
    return false;
 }

-static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
-                                      const char *verbose_prefix)
+void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
+                               const char *verbose_prefix)
 {
    CPUX86State *env = &cpu->env;
    FeatureWordInfo *f = &feature_word_info[w];
@@ -5724,6 +5820,12 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
            : CPUID_EXT2_LM;
        r &= ~unavail;
    }
+#endif
+#ifndef CONFIG_USER_ONLY
+    /* skip migratable flag trim in TD VM case */
+    if (is_tdx_vm() && wi->type == CPUID_FEATURE_WORD) {
+        return r;
+    }
 #endif
    if (migratable_only) {
        r &= x86_cpu_get_migratable_flags(w);
@@ -5911,10 +6013,12 @@ static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model)

    /*
     * Properties in versioned CPU model are not user specified features.
-     * We can simply clear env->user_features here since it will be filled later
-     * in x86_cpu_expand_features() based on plus_features and minus_features.
+     * We can simply clear env->user_{plus/minus}_features here since it will
+     * be filled later in x86_cpu_expand_features() based on plus_features and
+     * minus_features.
     */
-    memset(&env->user_features, 0, sizeof(env->user_features));
+    memset(&env->user_plus_features, 0, sizeof(env->user_plus_features));
+    memset(&env->user_minus_features, 0, sizeof(env->user_minus_features));
 }

 static const gchar *x86_gdb_arch_name(CPUState *cs)
@@ -6061,6 +6165,15 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
            *ebx |= (cs->nr_cores * cs->nr_threads) << 16;
            *edx |= CPUID_HT;
        }
+        /*
+         * tdx_fixed0/1 are only reflected in env->features[].
+         * Avoid breaking the tdx_fixed when pmu is disabled and TDX is enabled
+         */
+#ifndef CONFIG_USER_ONLY
+        if (is_tdx_vm()) {
+            break;
+        }
+#endif
        if (!cpu->enable_pmu) {
            *ecx &= ~CPUID_EXT_PDCM;
        }
@@ -7010,6 +7123,12 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
        }
    }

+#ifndef CONFIG_USER_ONLY
+    if (is_tdx_vm()) {
+        tdx_check_minus_features(CPU(cpu));
+    }
+#endif
+
    /*TODO: Now cpu->max_features doesn't overwrite features
     * set using QOM properties, and we can convert
     * plus_features & minus_features to global properties
@@ -7022,7 +7141,7 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
             */
            env->features[w] |=
                x86_cpu_get_supported_feature_word(w, cpu->migratable) &
-                ~env->user_features[w] &
+                ~env->user_minus_features[w] &
                ~feature_word_info[w].no_autoenable_flags;
        }
    }
@@ -7034,7 +7153,7 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)

            /* Not an error unless the dependent feature was added explicitly.  */
            mark_unavailable_features(cpu, d->to.index,
-                                      unavailable_features & env->user_features[d->to.index],
+                                      unavailable_features & env->user_plus_features[d->to.index],
                                      "This feature depends on other features that were not requested");

            env->features[d->to.index] &= ~unavailable_features;
@@ -7048,6 +7167,12 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
    x86_cpu_enable_xsave_components(cpu);

    /* CPUID[EAX=7,ECX=0].EBX always increased level automatically: */
+#ifndef CONFIG_USER_ONLY
+    if (is_tdx_vm()) {
+        tdx_apply_xfam_dependencies(CPU(cpu));
+    }
+#endif
+
    x86_cpu_adjust_feat_level(cpu, FEAT_7_0_EBX);
    if (cpu->full_cpuid_auto_level) {
        x86_cpu_adjust_feat_level(cpu, FEAT_1_EDX);
@@ -7367,6 +7492,16 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
        if (cpu->phys_bits == 0) {
            cpu->phys_bits = TCG_PHYS_ADDR_BITS;
        }
+#ifndef CONFIG_USER_ONLY
+        if (is_tdx_vm()) {
+            /*
+             * Some CPUID bits can't be reflected in env->features[],
+             * but they still have restrictions in TDX VM.
+             * e.g. physical address bits is fixed to 0x34.
+             */
+            cpu->phys_bits = TDX_PHYS_ADDR_BITS;
+        }
+#endif
    } else {
        /* For 32 bit systems don't use the user set value, but keep
         * phys_bits consistent with what we tell the guest.
@@ -7517,12 +7652,18 @@ static void x86_cpu_set_bit_prop(Object *obj, Visitor *v, const char *name,
        return;
    }

+    /*
+     * As minus has precedence over plus, if the features exists both
+     * in user_plus_features and user_minus_features, the minus has
+     * the precedence.
+     */
    if (value) {
        cpu->env.features[fp->w] |= fp->mask;
+        cpu->env.user_plus_features[fp->w] |= fp->mask;
    } else {
        cpu->env.features[fp->w] &= ~fp->mask;
+        cpu->env.user_minus_features[fp->w] |= fp->mask;
    }
-    cpu->env.user_features[fp->w] |= fp->mask;
 }

 /* Register a boolean property to get/set a single bit in a uint32_t field.
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -553,7 +553,11 @@ typedef enum X86Seg {
 #define XSTATE_OPMASK_BIT               5
 #define XSTATE_ZMM_Hi256_BIT            6
 #define XSTATE_Hi16_ZMM_BIT             7
+#define XSTATE_RTIT_BIT                 8
 #define XSTATE_PKRU_BIT                 9
+#define XSTATE_CET_U_BIT                11
+#define XSTATE_CET_S_BIT                12
+#define XSTATE_UINTR_BIT                14
 #define XSTATE_ARCH_LBR_BIT             15
 #define XSTATE_XTILE_CFG_BIT            17
 #define XSTATE_XTILE_DATA_BIT           18
@@ -566,13 +570,25 @@ typedef enum X86Seg {
 #define XSTATE_OPMASK_MASK              (1ULL << XSTATE_OPMASK_BIT)
 #define XSTATE_ZMM_Hi256_MASK           (1ULL << XSTATE_ZMM_Hi256_BIT)
 #define XSTATE_Hi16_ZMM_MASK            (1ULL << XSTATE_Hi16_ZMM_BIT)
+#define XSTATE_RTIT_MASK                (1ULL << XSTATE_RTIT_BIT)
 #define XSTATE_PKRU_MASK                (1ULL << XSTATE_PKRU_BIT)
+#define XSTATE_CET_U_MASK               (1ULL << XSTATE_CET_U_BIT)
+#define XSTATE_CET_S_MASK               (1ULL << XSTATE_CET_S_BIT)
+#define XSTATE_UINTR_MASK               (1ULL << XSTATE_UINTR_BIT)
 #define XSTATE_ARCH_LBR_MASK            (1ULL << XSTATE_ARCH_LBR_BIT)
 #define XSTATE_XTILE_CFG_MASK           (1ULL << XSTATE_XTILE_CFG_BIT)
 #define XSTATE_XTILE_DATA_MASK          (1ULL << XSTATE_XTILE_DATA_BIT)

 #define XSTATE_DYNAMIC_MASK             (XSTATE_XTILE_DATA_MASK)

+#define XSTATE_AVX_512_MASK             (XSTATE_OPMASK_MASK |       \
+                                         XSTATE_ZMM_Hi256_MASK |    \
+                                         XSTATE_Hi16_ZMM_MASK)
+#define XSTATE_CET_MASK                 (XSTATE_CET_U_MASK  |       \
+                                         XSTATE_CET_S_MASK)
+#define XSTATE_AMX_MASK                 (XSTATE_XTILE_CFG_MASK |    \
+                                         XSTATE_XTILE_DATA_MASK)
+
 #define ESA_FEATURE_ALIGN64_BIT         1
 #define ESA_FEATURE_XFD_BIT             2

@@ -588,6 +604,9 @@ typedef enum X86Seg {
                                 XSTATE_Hi16_ZMM_MASK | XSTATE_PKRU_MASK | \
                                 XSTATE_XTILE_CFG_MASK | XSTATE_XTILE_DATA_MASK)

+/* CPUID feature bits available in XSS */
+#define CPUID_XSTATE_XSS_MASK    (XSTATE_ARCH_LBR_MASK)
+
 /* CPUID feature words */
 typedef enum FeatureWord {
    FEAT_1_EDX,         /* CPUID[1].EDX */
@@ -780,6 +799,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,

 /* Support RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */
 #define CPUID_7_0_EBX_FSGSBASE          (1U << 0)
+/* Support for TSC adjustment MSR 0x3B */
+#define CPUID_7_0_EBX_TSC_ADJUST        (1U << 1)
 /* Support SGX */
 #define CPUID_7_0_EBX_SGX               (1U << 2)
 /* 1st Group of Advanced Bit Manipulation Extensions */
@@ -798,8 +819,12 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
 #define CPUID_7_0_EBX_INVPCID           (1U << 10)
 /* Restricted Transactional Memory */
 #define CPUID_7_0_EBX_RTM               (1U << 11)
+/* Cache QoS Monitoring */
+#define CPUID_7_0_EBX_PQM               (1U << 12)
 /* Memory Protection Extension */
 #define CPUID_7_0_EBX_MPX               (1U << 14)
+/* Resource Director Technology Allocation */
+#define CPUID_7_0_EBX_RDT_A             (1U << 15)
 /* AVX-512 Foundation */
 #define CPUID_7_0_EBX_AVX512F           (1U << 16)
 /* AVX-512 Doubleword & Quadword Instruction */
@@ -845,6 +870,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
 #define CPUID_7_0_ECX_WAITPKG           (1U << 5)
 /* Additional AVX-512 Vector Byte Manipulation Instruction */
 #define CPUID_7_0_ECX_AVX512_VBMI2      (1U << 6)
+/* CET SHSTK feature */
+#define CPUID_7_0_ECX_CET_SHSTK         (1U << 7)
 /* Galois Field New Instructions */
 #define CPUID_7_0_ECX_GFNI              (1U << 8)
 /* Vector AES Instructions */
@@ -855,12 +882,20 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
 #define CPUID_7_0_ECX_AVX512VNNI        (1U << 11)
 /* Support for VPOPCNT[B,W] and VPSHUFBITQMB */
 #define CPUID_7_0_ECX_AVX512BITALG      (1U << 12)
+/* Intel Total Memory Encryption */
+#define CPUID_7_0_ECX_TME               (1U << 13)
 /* POPCNT for vectors of DW/QW */
 #define CPUID_7_0_ECX_AVX512_VPOPCNTDQ  (1U << 14)
+/* Placeholder for bit 15 */
+#define CPUID_7_0_ECX_FZM               (1U << 15)
 /* 5-level Page Tables */
 #define CPUID_7_0_ECX_LA57              (1U << 16)
+/* MAWAU for MPX */
+#define CPUID_7_0_ECX_MAWAU             (31U << 17)
 /* Read Processor ID */
 #define CPUID_7_0_ECX_RDPID             (1U << 22)
+/* KeyLocker */
+#define CPUID_7_0_ECX_KeyLocker         (1U << 23)
 /* Bus Lock Debug Exception */
 #define CPUID_7_0_ECX_BUS_LOCK_DETECT   (1U << 24)
 /* Cache Line Demote Instruction */
@@ -869,6 +904,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
 #define CPUID_7_0_ECX_MOVDIRI           (1U << 27)
 /* Move 64 Bytes as Direct Store Instruction */
 #define CPUID_7_0_ECX_MOVDIR64B         (1U << 28)
+/* ENQCMD and ENQCMDS instructions */
+#define CPUID_7_0_ECX_ENQCMD            (1U << 29)
 /* Support SGX Launch Control */
 #define CPUID_7_0_ECX_SGX_LC            (1U << 30)
 /* Protection Keys for Supervisor-mode Pages */
@@ -880,14 +917,20 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
 #define CPUID_7_0_EDX_AVX512_4FMAPS     (1U << 3)
 /* Fast Short Rep Mov */
 #define CPUID_7_0_EDX_FSRM              (1U << 4)
+/* User Interrupt Support*/
+#define CPUID_7_0_EDX_UNIT              (1U << 5)
 /* AVX512 Vector Pair Intersection to a Pair of Mask Registers */
 #define CPUID_7_0_EDX_AVX512_VP2INTERSECT (1U << 8)
 /* SERIALIZE instruction */
 #define CPUID_7_0_EDX_SERIALIZE         (1U << 14)
 /* TSX Suspend Load Address Tracking instruction */
 #define CPUID_7_0_EDX_TSX_LDTRK         (1U << 16)
+/* PCONFIG instruction */
+#define CPUID_7_0_EDX_PCONFIG           (1U << 18)
 /* Architectural LBRs */
 #define CPUID_7_0_EDX_ARCH_LBR          (1U << 19)
+/* CET IBT feature */
+#define CPUID_7_0_EDX_CET_IBT           (1U << 20)
 /* AMX_BF16 instruction */
 #define CPUID_7_0_EDX_AMX_BF16          (1U << 22)
 /* AVX512_FP16 instruction */
@@ -1809,7 +1852,8 @@ typedef struct CPUArchState {
    uint32_t cpuid_version;
    FeatureWordArray features;
    /* Features that were explicitly enabled/disabled */
-    FeatureWordArray user_features;
+    FeatureWordArray user_plus_features;
+    FeatureWordArray user_minus_features;
    uint32_t cpuid_model[12];
    /* Cache information for CPUID.  When legacy-cache=on, the cache data
     * on each CPUID leaf will be different, because we keep compatibility
@@ -2237,6 +2281,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
 void cpu_clear_apic_feature(CPUX86State *env);
 void host_cpuid(uint32_t function, uint32_t count,
                uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
+void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
+                               const char *verbose_prefix);

 /* helper.c */
 void x86_cpu_set_a20(X86CPU *cpu, int a20_state);
--- a/target/i386/kvm/kvm-cpu.c
+++ b/target/i386/kvm/kvm-cpu.c
@@ -14,7 +14,9 @@
 #include "qapi/error.h"
 #include "sysemu/sysemu.h"
 #include "hw/boards.h"
+#include "tdx.h"

+#include "tdx.h"
 #include "kvm_i386.h"
 #include "hw/core/accel-cpu.h"

@@ -60,6 +62,10 @@ static bool lmce_supported(void)
    if (kvm_ioctl(kvm_state, KVM_X86_GET_MCE_CAP_SUPPORTED, &mce_cap) < 0) {
        return false;
    }
+
+    if (is_tdx_vm())
+        return false;
+
    return !!(mce_cap & MCG_LMCE_P);
 }

@@ -177,7 +183,9 @@ static void kvm_cpu_instance_init(CPUState *cs)
        }

        /* Special cases not set in the X86CPUDefinition structs: */
-        x86_cpu_apply_props(cpu, kvm_default_props);
+        if (!is_tdx_vm()) {
+            x86_cpu_apply_props(cpu, kvm_default_props);
+        }
    }

    if (cpu->max_features) {
--- a/target/i386/kvm/kvm-stub.c
+++ b/target/i386/kvm/kvm-stub.c
@@ -0,0 +1,56 @@
+/*
+ * QEMU KVM x86 specific function stubs
+ *
+ * Copyright Linaro Limited 2012
+ *
+ * Author: Peter Maydell <peter.maydell@linaro.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "kvm_i386.h"
+
+#ifndef __OPTIMIZE__
+bool kvm_has_smm(void)
+{
+    return 1;
+}
+
+bool kvm_has_pam(void)
+{
+    return true;
+}
+
+bool kvm_enable_x2apic(void)
+{
+    return false;
+}
+
+/* This function is only called inside conditionals which we
+ * rely on the compiler to optimize out when CONFIG_KVM is not
+ * defined.
+ */
+uint32_t kvm_arch_get_supported_cpuid(KVMState *env, uint32_t function,
+                                      uint32_t index, int reg)
+{
+    abort();
+}
+#endif
+
+bool kvm_hv_vpindex_settable(void)
+{
+    return false;
+}
+
+bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp)
+{
+    abort();
+}
+
+void kvm_set_max_apic_id(uint32_t max_apic_id)
+{
+    return;
+}
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -32,6 +32,7 @@
 #include "sysemu/runstate.h"
 #include "kvm_i386.h"
 #include "sev.h"
+#include "tdx.h"
 #include "xen-emu.h"
 #include "hyperv.h"
 #include "hyperv-proto.h"
@@ -61,6 +62,7 @@
 #include "migration/blocker.h"
 #include "exec/memattrs.h"
 #include "trace.h"
+#include "tdx.h"

 #include CONFIG_DEVICES

@@ -161,11 +163,46 @@ static KVMMSRHandlers msr_handlers[KVM_MSR_FILTER_MAX_RANGES];
 static RateLimit bus_lock_ratelimit_ctrl;
 static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value);

+static const char* vm_type_name[] = {
+    [KVM_X86_DEFAULT_VM] = "default",
+    [KVM_X86_SW_PROTECTED_VM] = "sw-protected-vm",
+    [KVM_X86_TDX_VM] = "tdx",
+};
+
+int kvm_get_vm_type(MachineState *ms, const char *vm_type)
+{
+    int kvm_type = KVM_X86_DEFAULT_VM;
+
+    if (ms->cgs && object_dynamic_cast(OBJECT(ms->cgs), TYPE_TDX_GUEST)) {
+        kvm_type = KVM_X86_TDX_VM;
+    }
+
+    /*
+     * old KVM doesn't support KVM_CAP_VM_TYPES and KVM_X86_DEFAULT_VM
+     * is always supported
+     */
+    if (kvm_type == KVM_X86_DEFAULT_VM) {
+        return kvm_type;
+    }
+
+    if (!(kvm_check_extension(KVM_STATE(ms->accelerator), KVM_CAP_VM_TYPES) & BIT(kvm_type))) {
+        error_report("vm-type %s not supported by KVM", vm_type_name[kvm_type]);
+        exit(1);
+    }
+
+    return kvm_type;
+}
+
 bool kvm_has_smm(void)
 {
    return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM);
 }

+bool kvm_has_pam(void)
+{
+    return true;
+}
+
 bool kvm_has_adjust_clock_stable(void)
 {
    int ret = kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK);
@@ -247,7 +284,7 @@ void kvm_synchronize_all_tsc(void)
 {
    CPUState *cpu;

-    if (kvm_enabled()) {
+    if (kvm_enabled() && !is_tdx_vm()) {
        CPU_FOREACH(cpu) {
            run_on_cpu(cpu, do_kvm_synchronize_tsc, RUN_ON_CPU_NULL);
        }
@@ -279,6 +316,20 @@ static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
    return cpuid;
 }

+static void pr_cpuid_entries(const char *title, struct kvm_cpuid2 *cpuids)
+{
+    int i, nr = cpuids->nent;
+    struct kvm_cpuid_entry2 *e;
+
+	printf("%s\n", title);
+    for (i = 0; i < nr; i++) {
+        e = cpuids->entries + i;
+        printf("CPUID: 0x%-8x 0x%2x:    0x%08x  0x%08x  0x%08x  0x%08x\n",
+                e->function, e->index, e->eax, e->ebx, e->ecx, e->edx);
+    }
+    printf("\n");
+}
+
 /* Run KVM_GET_SUPPORTED_CPUID ioctl(), allocating a buffer large enough
 * for all entries.
 */
@@ -294,6 +345,7 @@ static struct kvm_cpuid2 *get_supported_cpuid(KVMState *s)
        max *= 2;
    }
    cpuid_cache = cpuid;
+    pr_cpuid_entries("KVM returned KVM_GET_SUPPORTED_CPUID:", cpuid);
    return cpuid;
 }

@@ -490,6 +542,10 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
        ret |= 1U << KVM_HINTS_REALTIME;
    }

+    if (is_tdx_vm()) {
+        tdx_get_supported_cpuid(function, index, reg, &ret);
+    }
+
    return ret;
 }

@@ -759,6 +815,15 @@ static int kvm_arch_set_tsc_khz(CPUState *cs)
    int r, cur_freq;
    bool set_ioctl = false;

+    /*
+     * TSC of TD vcpu is immutable, it cannot be set/changed via vcpu scope
+     * VM_SET_TSC_KHZ, but only be initialized via VM scope VM_SET_TSC_KHZ
+     * before ioctl KVM_TDX_INIT_VM in tdx_pre_create_vcpu()
+     */
+    if (is_tdx_vm()) {
+        return 0;
+    }
+
    if (!env->tsc_khz) {
        return 0;
    }
@@ -1655,8 +1720,6 @@ static int hyperv_init_vcpu(X86CPU *cpu)

 static Error *invtsc_mig_blocker;

-#define KVM_MAX_CPUID_ENTRIES  100
-
 static void kvm_init_xsave(CPUX86State *env)
 {
    if (has_xsave2) {
@@ -1699,6 +1762,236 @@ static void kvm_init_nested_state(CPUX86State *env)
    }
 }

+uint32_t kvm_x86_arch_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries,
+                            uint32_t cpuid_i)
+{
+    uint32_t limit, i, j;
+    uint32_t unused;
+    struct kvm_cpuid_entry2 *c;
+
+    cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
+
+    for (i = 0; i <= limit; i++) {
+        if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+            fprintf(stderr, "unsupported level value: 0x%x\n", limit);
+            abort();
+        }
+        c = &entries[cpuid_i++];
+
+        switch (i) {
+        case 2: {
+            /* Keep reading function 2 till all the input is received */
+            int times;
+
+            c->function = i;
+            c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC |
+                       KVM_CPUID_FLAG_STATE_READ_NEXT;
+            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+            times = c->eax & 0xff;
+
+            for (j = 1; j < times; ++j) {
+                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+                    fprintf(stderr, "cpuid_data is full, no space for "
+                            "cpuid(eax:2):eax & 0xf = 0x%x\n", times);
+                    abort();
+                }
+                c = &entries[cpuid_i++];
+                c->function = i;
+                c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
+                cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+            }
+            break;
+        }
+        case 0x1f:
+            if (env->nr_dies < 2) {
+                cpuid_i--;
+                break;
+            }
+            /* fallthrough */
+        case 4:
+        case 0xb:
+        case 0xd:
+            for (j = 0; ; j++) {
+                if (i == 0xd && j == 64) {
+                    break;
+                }
+
+                c->function = i;
+                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+                c->index = j;
+                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
+
+                if (i == 4 && c->eax == 0) {
+                    break;
+                }
+                if (i == 0xb && !(c->ecx & 0xff00)) {
+                    break;
+                }
+                if (i == 0x1f && !(c->ecx & 0xff00)) {
+                    break;
+                }
+                if (i == 0xd && c->eax == 0) {
+                    continue;
+                }
+                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+                    fprintf(stderr, "cpuid_data is full, no space for "
+                            "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
+                    abort();
+                }
+                c = &entries[cpuid_i++];
+            }
+            break;
+        case 0x7:
+        case 0x12:
+            for (j = 0; ; j++) {
+                c->function = i;
+                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+                c->index = j;
+                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
+
+                if (j > 1 && (c->eax & 0xf) != 1) {
+                    break;
+                }
+
+                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+                    fprintf(stderr, "cpuid_data is full, no space for "
+                                "cpuid(eax:0x12,ecx:0x%x)\n", j);
+                    abort();
+                }
+                c = &entries[cpuid_i++];
+            }
+            break;
+        case 0x14:
+        case 0x1d:
+        case 0x1e: {
+            uint32_t times;
+
+            c->function = i;
+            c->index = 0;
+            c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+            times = c->eax;
+
+            for (j = 1; j <= times; ++j) {
+                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+                    fprintf(stderr, "cpuid_data is full, no space for "
+                                "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
+                    abort();
+                }
+                c = &entries[cpuid_i++];
+                c->function = i;
+                c->index = j;
+                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
+            }
+            break;
+        }
+        default:
+            c->function = i;
+            c->flags = 0;
+            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+            if (!c->eax && !c->ebx && !c->ecx && !c->edx) {
+                /*
+                 * KVM already returns all zeroes if a CPUID entry is missing,
+                 * so we can omit it and avoid hitting KVM's 80-entry limit.
+                 */
+                cpuid_i--;
+            }
+            break;
+        }
+    }
+
+    if (limit >= 0x0a) {
+        uint32_t eax, edx;
+
+        cpu_x86_cpuid(env, 0x0a, 0, &eax, &unused, &unused, &edx);
+
+        has_architectural_pmu_version = eax & 0xff;
+        if (has_architectural_pmu_version > 0) {
+            num_architectural_pmu_gp_counters = (eax & 0xff00) >> 8;
+
+            /* Shouldn't be more than 32, since that's the number of bits
+             * available in EBX to tell us _which_ counters are available.
+             * Play it safe.
+             */
+            if (num_architectural_pmu_gp_counters > MAX_GP_COUNTERS) {
+                num_architectural_pmu_gp_counters = MAX_GP_COUNTERS;
+            }
+
+            if (has_architectural_pmu_version > 1) {
+                num_architectural_pmu_fixed_counters = edx & 0x1f;
+
+                if (num_architectural_pmu_fixed_counters > MAX_FIXED_COUNTERS) {
+                    num_architectural_pmu_fixed_counters = MAX_FIXED_COUNTERS;
+                }
+            }
+        }
+    }
+
+    cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
+
+    for (i = 0x80000000; i <= limit; i++) {
+        if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+            fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit);
+            abort();
+        }
+        c = &entries[cpuid_i++];
+
+        switch (i) {
+        case 0x8000001d:
+            /* Query for all AMD cache information leaves */
+            for (j = 0; ; j++) {
+                c->function = i;
+                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+                c->index = j;
+                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
+
+                if (c->eax == 0) {
+                    break;
+                }
+                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+                    fprintf(stderr, "cpuid_data is full, no space for "
+                            "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
+                    abort();
+                }
+                c = &entries[cpuid_i++];
+            }
+            break;
+        default:
+            c->function = i;
+            c->flags = 0;
+            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+            if (!c->eax && !c->ebx && !c->ecx && !c->edx) {
+                /*
+                 * KVM already returns all zeroes if a CPUID entry is missing,
+                 * so we can omit it and avoid hitting KVM's 80-entry limit.
+                 */
+                cpuid_i--;
+            }
+            break;
+        }
+    }
+
+    /* Call Centaur's CPUID instructions they are supported. */
+    if (env->cpuid_xlevel2 > 0) {
+        cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused);
+
+        for (i = 0xC0000000; i <= limit; i++) {
+            if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+                fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit);
+                abort();
+            }
+            c = &entries[cpuid_i++];
+
+            c->function = i;
+            c->flags = 0;
+            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+        }
+    }
+
+    return cpuid_i;
+}
+
 int kvm_arch_init_vcpu(CPUState *cs)
 {
    struct {
@@ -1715,8 +2008,7 @@ int kvm_arch_init_vcpu(CPUState *cs)

    X86CPU *cpu = X86_CPU(cs);
    CPUX86State *env = &cpu->env;
-    uint32_t limit, i, j, cpuid_i;
-    uint32_t unused;
+    uint32_t cpuid_i;
    struct kvm_cpuid_entry2 *c;
    uint32_t signature[3];
    int kvm_base = KVM_CPUID_SIGNATURE;
@@ -1869,8 +2161,6 @@ int kvm_arch_init_vcpu(CPUState *cs)
        c->edx = env->features[FEAT_KVM_HINTS];
    }

-    cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
-
    if (cpu->kvm_pv_enforce_cpuid) {
        r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1);
        if (r < 0) {
@@ -1881,227 +2171,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
        }
    }

-    for (i = 0; i <= limit; i++) {
-        if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
-            fprintf(stderr, "unsupported level value: 0x%x\n", limit);
-            abort();
-        }
-        c = &cpuid_data.entries[cpuid_i++];
-
-        switch (i) {
-        case 2: {
-            /* Keep reading function 2 till all the input is received */
-            int times;
-
-            c->function = i;
-            c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC |
-                       KVM_CPUID_FLAG_STATE_READ_NEXT;
-            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
-            times = c->eax & 0xff;
-
-            for (j = 1; j < times; ++j) {
-                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
-                    fprintf(stderr, "cpuid_data is full, no space for "
-                            "cpuid(eax:2):eax & 0xf = 0x%x\n", times);
-                    abort();
-                }
-                c = &cpuid_data.entries[cpuid_i++];
-                c->function = i;
-                c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
-                cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
-            }
-            break;
-        }
-        case 0x1f:
-            if (env->nr_dies < 2) {
-                break;
-            }
-            /* fallthrough */
-        case 4:
-        case 0xb:
-        case 0xd:
-            for (j = 0; ; j++) {
-                if (i == 0xd && j == 64) {
-                    break;
-                }
-
-                if (i == 0x1f && j == 64) {
-                    break;
-                }
-
-                c->function = i;
-                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
-                c->index = j;
-                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
-
-                if (i == 4 && c->eax == 0) {
-                    break;
-                }
-                if (i == 0xb && !(c->ecx & 0xff00)) {
-                    break;
-                }
-                if (i == 0x1f && !(c->ecx & 0xff00)) {
-                    break;
-                }
-                if (i == 0xd && c->eax == 0) {
-                    continue;
-                }
-                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
-                    fprintf(stderr, "cpuid_data is full, no space for "
-                            "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
-                    abort();
-                }
-                c = &cpuid_data.entries[cpuid_i++];
-            }
-            break;
-        case 0x7:
-        case 0x12:
-            for (j = 0; ; j++) {
-                c->function = i;
-                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
-                c->index = j;
-                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
-
-                if (j > 1 && (c->eax & 0xf) != 1) {
-                    break;
-                }
-
-                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
-                    fprintf(stderr, "cpuid_data is full, no space for "
-                                "cpuid(eax:0x12,ecx:0x%x)\n", j);
-                    abort();
-                }
-                c = &cpuid_data.entries[cpuid_i++];
-            }
-            break;
-        case 0x14:
-        case 0x1d:
-        case 0x1e: {
-            uint32_t times;
-
-            c->function = i;
-            c->index = 0;
-            c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
-            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
-            times = c->eax;
-
-            for (j = 1; j <= times; ++j) {
-                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
-                    fprintf(stderr, "cpuid_data is full, no space for "
-                                "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
-                    abort();
-                }
-                c = &cpuid_data.entries[cpuid_i++];
-                c->function = i;
-                c->index = j;
-                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
-                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
-            }
-            break;
-        }
-        default:
-            c->function = i;
-            c->flags = 0;
-            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
-            if (!c->eax && !c->ebx && !c->ecx && !c->edx) {
-                /*
-                 * KVM already returns all zeroes if a CPUID entry is missing,
-                 * so we can omit it and avoid hitting KVM's 80-entry limit.
-                 */
-                cpuid_i--;
-            }
-            break;
-        }
-    }
-
-    if (limit >= 0x0a) {
-        uint32_t eax, edx;
-
-        cpu_x86_cpuid(env, 0x0a, 0, &eax, &unused, &unused, &edx);
-
-        has_architectural_pmu_version = eax & 0xff;
-        if (has_architectural_pmu_version > 0) {
-            num_architectural_pmu_gp_counters = (eax & 0xff00) >> 8;
-
-            /* Shouldn't be more than 32, since that's the number of bits
-             * available in EBX to tell us _which_ counters are available.
-             * Play it safe.
-             */
-            if (num_architectural_pmu_gp_counters > MAX_GP_COUNTERS) {
-                num_architectural_pmu_gp_counters = MAX_GP_COUNTERS;
-            }
-
-            if (has_architectural_pmu_version > 1) {
-                num_architectural_pmu_fixed_counters = edx & 0x1f;
-
-                if (num_architectural_pmu_fixed_counters > MAX_FIXED_COUNTERS) {
-                    num_architectural_pmu_fixed_counters = MAX_FIXED_COUNTERS;
-                }
-            }
-        }
-    }
-
-    cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
-
-    for (i = 0x80000000; i <= limit; i++) {
-        if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
-            fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit);
-            abort();
-        }
-        c = &cpuid_data.entries[cpuid_i++];
-
-        switch (i) {
-        case 0x8000001d:
-            /* Query for all AMD cache information leaves */
-            for (j = 0; ; j++) {
-                c->function = i;
-                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
-                c->index = j;
-                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
-
-                if (c->eax == 0) {
-                    break;
-                }
-                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
-                    fprintf(stderr, "cpuid_data is full, no space for "
-                            "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
-                    abort();
-                }
-                c = &cpuid_data.entries[cpuid_i++];
-            }
-            break;
-        default:
-            c->function = i;
-            c->flags = 0;
-            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
-            if (!c->eax && !c->ebx && !c->ecx && !c->edx) {
-                /*
-                 * KVM already returns all zeroes if a CPUID entry is missing,
-                 * so we can omit it and avoid hitting KVM's 80-entry limit.
-                 */
-                cpuid_i--;
-            }
-            break;
-        }
-    }
-
-    /* Call Centaur's CPUID instructions they are supported. */
-    if (env->cpuid_xlevel2 > 0) {
-        cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused);
-
-        for (i = 0xC0000000; i <= limit; i++) {
-            if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
-                fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit);
-                abort();
-            }
-            c = &cpuid_data.entries[cpuid_i++];
-
-            c->function = i;
-            c->flags = 0;
-            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
-        }
-    }
-
+    cpuid_i = kvm_x86_arch_cpuid(env, cpuid_data.entries, cpuid_i);
    cpuid_data.cpuid.nent = cpuid_i;

    if (((env->cpuid_version >> 8)&0xF) >= 6
@@ -2193,6 +2263,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
    cpuid_data.cpuid.nent = cpuid_i;

    cpuid_data.cpuid.padding = 0;
+
+    pr_cpuid_entries("constructed CPUID for vcpu:", &cpuid_data.cpuid);
+
    r = kvm_vcpu_ioctl(cs, KVM_SET_CPUID2, &cpuid_data);
    if (r) {
        goto fail;
@@ -2227,6 +2300,15 @@ int kvm_arch_init_vcpu(CPUState *cs)
    return r;
 }

+int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+    if (is_tdx_vm()) {
+        return tdx_pre_create_vcpu(cpu, errp);
+    }
+
+    return 0;
+}
+
 int kvm_arch_destroy_vcpu(CPUState *cs)
 {
    X86CPU *cpu = X86_CPU(cs);
@@ -2514,6 +2596,17 @@ int kvm_arch_get_default_type(MachineState *ms)
    return 0;
 }

+static int kvm_confidential_guest_init(MachineState *ms, Error **errp)
+{
+    if (object_dynamic_cast(OBJECT(ms->cgs), TYPE_SEV_GUEST)) {
+        return sev_kvm_init(ms->cgs, errp);
+    } else if (object_dynamic_cast(OBJECT(ms->cgs), TYPE_TDX_GUEST)) {
+        return tdx_kvm_init(ms, errp);
+    }
+
+    return 0;
+}
+
 int kvm_arch_init(MachineState *ms, KVMState *s)
 {
    uint64_t identity_base = 0xfffbc000;
@@ -2523,18 +2616,12 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
    Error *local_err = NULL;

    /*
-     * Initialize SEV context, if required
+     * Initialize confidential guest (SEV/TDX) context, if required
     *
-     * If no memory encryption is requested (ms->cgs == NULL) this is
-     * a no-op.
-     *
-     * It's also a no-op if a non-SEV confidential guest support
-     * mechanism is selected.  SEV is the only mechanism available to
-     * select on x86 at present, so this doesn't arise, but if new
-     * mechanisms are supported in future (e.g. TDX), they'll need
-     * their own initialization either here or elsewhere.
+     * It's a no-op if a non-SEV/non-tdx confidential guest support
+     * mechanism is selected, i.e., ms->cgs == NULL
     */
-    ret = sev_kvm_init(ms->cgs, &local_err);
+    ret = kvm_confidential_guest_init(ms, &local_err);
    if (ret < 0) {
        error_report_err(local_err);
        return ret;
@@ -2997,6 +3084,11 @@ void kvm_put_apicbase(X86CPU *cpu, uint64_t value)
 {
    int ret;

+    /* TODO: Allow accessing guest state for debug TDs. */
+    if (is_tdx_vm()) {
+        return;
+    }
+
    ret = kvm_put_one_msr(cpu, MSR_IA32_APICBASE, value);
    assert(ret == 1);
 }
@@ -3215,32 +3307,34 @@ static void kvm_init_msrs(X86CPU *cpu)
    CPUX86State *env = &cpu->env;

    kvm_msr_buf_reset(cpu);
-    if (has_msr_arch_capabs) {
-        kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES,
-                          env->features[FEAT_ARCH_CAPABILITIES]);
-    }

-    if (has_msr_core_capabs) {
-        kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY,
-                          env->features[FEAT_CORE_CAPABILITY]);
-    }
+    if (!is_tdx_vm()) {
+        if (has_msr_arch_capabs) {
+            kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES,
+                                env->features[FEAT_ARCH_CAPABILITIES]);
+        }

-    if (has_msr_perf_capabs && cpu->enable_pmu) {
-        kvm_msr_entry_add_perf(cpu, env->features);
+        if (has_msr_core_capabs) {
+            kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY,
+                                env->features[FEAT_CORE_CAPABILITY]);
+        }
+
+        if (has_msr_perf_capabs && cpu->enable_pmu) {
+            kvm_msr_entry_add_perf(cpu, env->features);
+        }
+
+        /*
+         * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but
+         * all kernels with MSR features should have them.
+         */
+        if (kvm_feature_msrs && cpu_has_vmx(env)) {
+            kvm_msr_entry_add_vmx(cpu, env->features);
+        }
    }

    if (has_msr_ucode_rev) {
        kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev);
    }
-
-    /*
-     * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but
-     * all kernels with MSR features should have them.
-     */
-    if (kvm_feature_msrs && cpu_has_vmx(env)) {
-        kvm_msr_entry_add_vmx(cpu, env->features);
-    }
-
    assert(kvm_buf_set_msrs(cpu) == 0);
 }

@@ -4550,6 +4644,11 @@ int kvm_arch_put_registers(CPUState *cpu, int level)

    assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));

+    /* TODO: Allow accessing guest state for debug TDs. */
+    if (is_tdx_vm()) {
+        return 0;
+    }
+
    /*
     * Put MSR_IA32_FEATURE_CONTROL first, this ensures the VM gets out of VMX
     * root operation upon vCPU reset. kvm_put_msr_feature_control() should also
@@ -4650,6 +4749,12 @@ int kvm_arch_get_registers(CPUState *cs)
    if (ret < 0) {
        goto out;
    }
+
+    /* TODO: Allow accessing guest state for debug TDs. */
+    if (is_tdx_vm()) {
+        return 0;
+    }
+
    ret = kvm_getput_regs(cpu, 0);
    if (ret < 0) {
        goto out;
@@ -5350,6 +5455,15 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
        ret = kvm_xen_handle_exit(cpu, &run->xen);
        break;
 #endif
+    case KVM_EXIT_TDX:
+        if (!is_tdx_vm()) {
+            error_report("KVM: get KVM_EXIT_TDX for a non-TDX VM.");
+            ret = -1;
+            break;
+        }
+        tdx_handle_exit(cpu, &run->tdx);
+        ret = 0;
+        break;
    default:
        fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
        ret = -1;
@@ -5602,7 +5716,7 @@ bool kvm_has_waitpkg(void)

 bool kvm_arch_cpu_check_are_resettable(void)
 {
-    return !sev_es_enabled();
+    return !sev_es_enabled() && !is_tdx_vm();
 }

 #define ARCH_REQ_XCOMP_GUEST_PERM       0x1025
--- a/target/i386/kvm/kvm_i386.h
+++ b/target/i386/kvm/kvm_i386.h
@@ -13,6 +13,8 @@

 #include "sysemu/kvm.h"

+#define KVM_MAX_CPUID_ENTRIES  100
+
 #ifdef CONFIG_KVM

 #define kvm_pit_in_kernel() \
@@ -22,6 +24,9 @@
 #define kvm_ioapic_in_kernel() \
    (kvm_irqchip_in_kernel() && !kvm_irqchip_is_split())

+uint32_t kvm_x86_arch_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries,
+                            uint32_t cpuid_i);
+
 #else

 #define kvm_pit_in_kernel()      0
@@ -33,10 +38,12 @@
 bool kvm_has_smm(void);
 bool kvm_enable_x2apic(void);
 bool kvm_hv_vpindex_settable(void);
+bool kvm_has_pam(void);

 bool kvm_enable_sgx_provisioning(KVMState *s);
 bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp);

+int kvm_get_vm_type(MachineState *ms, const char *vm_type);
 void kvm_arch_reset_vcpu(X86CPU *cs);
 void kvm_arch_after_reset_vcpu(X86CPU *cpu);
 void kvm_arch_do_init_vcpu(X86CPU *cs);
--- a/target/i386/kvm/meson.build
+++ b/target/i386/kvm/meson.build
@@ -9,6 +9,8 @@ i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c'))

 i386_kvm_ss.add(when: 'CONFIG_SEV', if_false: files('sev-stub.c'))

+i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c'), if_false: files('tdx-stub.c'))
+
 i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c'))

 i386_system_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss)
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .1.50
 .1.90