docs: Add TDX documentation

Add docs/system/i386/tdx.rst for TDX support, and add tdx in confidential-guest-support.rst Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com> --- Changes in v5: - Add TD attestation section and update the QEMU parameter; Changes since v1: - Add prerequisite of private gmem; - update example command to launch TD; Changes since RFC v4: - add the restriction that kernel-irqchip must be split
i386/tdx: Don't get/put guest state for TDX VMs
2024-02-29 00:31:58 -05:00 · 2024-02-29 00:31:58 -05:00 · 2024-02-29 00:31:58 -05:00 · 2024-02-29 00:31:58 -05:00 · 2024-02-29 00:31:58 -05:00 · 2024-02-29 00:31:58 -05:00
87 changed files with 4304 additions and 622 deletions
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -91,6 +91,8 @@ bool kvm_msi_use_devid;
 static bool kvm_has_guest_debug;
 static int kvm_sstep_flags;
 static bool kvm_immediate_exit;
 static bool kvm_guest_memfd_supported;
 static uint64_t kvm_supported_memory_attributes;
 static hwaddr kvm_max_slot_size = ~0;
 static const KVMCapabilityInfo kvm_required_capabilites[] = {
@@ -282,34 +284,69 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
 static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, bool new)
 {
    KVMState *s = kvm_state;
-    struct kvm_userspace_memory_region mem;
+    struct kvm_userspace_memory_region2 mem;
    static int cap_user_memory2 = -1;
    int ret;
    if (cap_user_memory2 == -1) {
        cap_user_memory2 = kvm_check_extension(s, KVM_CAP_USER_MEMORY2);
    }
    if (!cap_user_memory2 && slot->guest_memfd >= 0) {
        error_report("%s, KVM doesn't support KVM_CAP_USER_MEMORY2,"
                     " which is required by guest memfd!", __func__);
        exit(1);
    }
    mem.slot = slot->slot | (kml->as_id << 16);
    mem.guest_phys_addr = slot->start_addr;
    mem.userspace_addr = (unsigned long)slot->ram;
    mem.flags = slot->flags;
    mem.guest_memfd = slot->guest_memfd;
    mem.guest_memfd_offset = slot->guest_memfd_offset;
    if (slot->memory_size && !new && (mem.flags ^ slot->old_flags) & KVM_MEM_READONLY) {
        /* Set the slot size to 0 before setting the slot to the desired
         * value. This is needed based on KVM commit 75d61fbc. */
        mem.memory_size = 0;
-        ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
+
        if (cap_user_memory2) {
            ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem);
        } else {
            ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
        }
        if (ret < 0) {
            goto err;
        }
    }
    mem.memory_size = slot->memory_size;
-    ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
+    if (cap_user_memory2) {
        ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem);
    } else {
        ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
    }
    slot->old_flags = mem.flags;
 err:
-    trace_kvm_set_user_memory(mem.slot, mem.flags, mem.guest_phys_addr,
+    trace_kvm_set_user_memory(mem.slot >> 16, (uint16_t)mem.slot, mem.flags,
-                              mem.memory_size, mem.userspace_addr, ret);
+                              mem.guest_phys_addr, mem.memory_size,
                              mem.userspace_addr, mem.guest_memfd,
                              mem.guest_memfd_offset, ret);
    if (ret < 0) {
-        error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d,"
+        if (cap_user_memory2) {
-                     " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s",
+                error_report("%s: KVM_SET_USER_MEMORY_REGION2 failed, slot=%d,"
-                     __func__, mem.slot, slot->start_addr,
+                        " start=0x%" PRIx64 ", size=0x%" PRIx64 ","
-                     (uint64_t)mem.memory_size, strerror(errno));
+                        " flags=0x%" PRIx32 ", guest_memfd=%" PRId32 ","
                        " guest_memfd_offset=0x%" PRIx64 ": %s",
                        __func__, mem.slot, slot->start_addr,
                        (uint64_t)mem.memory_size, mem.flags,
                        mem.guest_memfd, (uint64_t)mem.guest_memfd_offset,
                        strerror(errno));
        } else {
                error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d,"
                            " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s",
                            __func__, mem.slot, slot->start_addr,
                            (uint64_t)mem.memory_size, strerror(errno));
        }
    }
    return ret;
 }
@@ -381,6 +418,11 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
    return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
 }
 int __attribute__ ((weak)) kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
 {
    return 0;
 }
 int kvm_init_vcpu(CPUState *cpu, Error **errp)
 {
    KVMState *s = kvm_state;
@@ -389,15 +431,27 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
    trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
    /*
     * tdx_pre_create_vcpu() may call cpu_x86_cpuid(). It in turn may call
     * kvm_vm_ioctl(). Set cpu->kvm_state in advance to avoid NULL pointer
     * dereference.
     */
    cpu->kvm_state = s;
    ret = kvm_arch_pre_create_vcpu(cpu, errp);
    if (ret < 0) {
        cpu->kvm_state = NULL;
        goto err;
    }
    ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
    if (ret < 0) {
        error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)",
                         kvm_arch_vcpu_id(cpu));
        cpu->kvm_state = NULL;
        goto err;
    }
    cpu->kvm_fd = ret;
    cpu->kvm_state = s;
    cpu->vcpu_dirty = true;
    cpu->dirty_pages = 0;
    cpu->throttle_us_per_full = 0;
@@ -464,6 +518,9 @@ static int kvm_mem_flags(MemoryRegion *mr)
    if (readonly && kvm_readonly_mem_allowed) {
        flags |= KVM_MEM_READONLY;
    }
    if (memory_region_has_guest_memfd(mr)) {
        flags |= KVM_MEM_GUEST_MEMFD;
    }
    return flags;
 }
@@ -1265,6 +1322,46 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size)
    kvm_max_slot_size = max_slot_size;
 }
 static int kvm_set_memory_attributes(hwaddr start, hwaddr size, uint64_t attr)
 {
    struct kvm_memory_attributes attrs;
    int r;
    if (kvm_supported_memory_attributes == 0) {
        error_report("No memory attribute supported by KVM\n");
        return -EINVAL;
    }
    if ((attr & kvm_supported_memory_attributes) != attr) {
        error_report("memory attribute 0x%lx not supported by KVM,"
                     " supported bits are 0x%lx\n",
                     attr, kvm_supported_memory_attributes);
        return -EINVAL;
    }
    attrs.attributes = attr;
    attrs.address = start;
    attrs.size = size;
    attrs.flags = 0;
    r = kvm_vm_ioctl(kvm_state, KVM_SET_MEMORY_ATTRIBUTES, &attrs);
    if (r) {
        error_report("failed to set memory (0x%lx+%#zx) with attr 0x%lx error '%s'",
                     start, size, attr, strerror(errno));
    }
    return r;
 }
 int kvm_set_memory_attributes_private(hwaddr start, hwaddr size)
 {
    return kvm_set_memory_attributes(start, size, KVM_MEMORY_ATTRIBUTE_PRIVATE);
 }
 int kvm_set_memory_attributes_shared(hwaddr start, hwaddr size)
 {
    return kvm_set_memory_attributes(start, size, 0);
 }
 /* Called with KVMMemoryListener.slots_lock held */
 static void kvm_set_phys_mem(KVMMemoryListener *kml,
                             MemoryRegionSection *section, bool add)
@@ -1361,6 +1458,9 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
        mem->ram_start_offset = ram_start_offset;
        mem->ram = ram;
        mem->flags = kvm_mem_flags(mr);
        mem->guest_memfd = mr->ram_block->guest_memfd;
        mem->guest_memfd_offset = (uint8_t*)ram - mr->ram_block->host;
        kvm_slot_init_dirty_bitmap(mem);
        err = kvm_set_user_memory_region(kml, mem, true);
        if (err) {
@@ -1368,6 +1468,16 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
                    strerror(-err));
            abort();
        }
        if (memory_region_has_guest_memfd(mr)) {
            err = kvm_set_memory_attributes_private(start_addr, slot_size);
            if (err) {
                error_report("%s: failed to set memory attribute private: %s\n",
                             __func__, strerror(-err));
                exit(1);
            }
        }
        start_addr += slot_size;
        ram_start_offset += slot_size;
        ram += slot_size;
@@ -2395,6 +2505,11 @@ static int kvm_init(MachineState *ms)
    }
    s->as = g_new0(struct KVMAs, s->nr_as);
    kvm_guest_memfd_supported = kvm_check_extension(s, KVM_CAP_GUEST_MEMFD);
    ret = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES);
    kvm_supported_memory_attributes = ret > 0 ? ret : 0;
    if (object_property_find(OBJECT(current_machine), "kvm-type")) {
        g_autofree char *kvm_type = object_property_get_str(OBJECT(current_machine),
                                                            "kvm-type",
@@ -2815,6 +2930,95 @@ static void kvm_eat_signals(CPUState *cpu)
    } while (sigismember(&chkset, SIG_IPI));
 }
 int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private)
 {
    MemoryRegionSection section;
    ram_addr_t offset;
    MemoryRegion *mr;
    RAMBlock *rb;
    void *addr;
    int ret = -1;
    trace_kvm_convert_memory(start, size, to_private ? "shared_to_private" : "private_to_shared");
    if (!QEMU_PTR_IS_ALIGNED(start, qemu_host_page_size) ||
        !QEMU_PTR_IS_ALIGNED(size, qemu_host_page_size)) {
        return -1;
    }
    if (!size) {
        return -1;
    }
    section = memory_region_find(get_system_memory(), start, size);
    mr = section.mr;
    if (!mr) {
        /*
         * Ignore converting non-assigned region to shared.
         *
         * TDX requires vMMIO region to be shared to inject #VE to guest.
         * OVMF issues conservatively MapGPA(shared) on 32bit PCI MMIO region,
         * and vIO-APIC 0xFEC00000 4K page.
         * OVMF assigns 32bit PCI MMIO region to
         * [top of low memory: typically 2GB=0xC000000,  0xFC00000)
         */
        if (!to_private) {
            return 0;
        }
        return -1;
    }
    if (memory_region_has_guest_memfd(mr)) {
        if (to_private) {
            ret = kvm_set_memory_attributes_private(start, size);
        } else {
            ret = kvm_set_memory_attributes_shared(start, size);
        }
        if (ret) {
            memory_region_unref(section.mr);
            return ret;
        }
        addr = memory_region_get_ram_ptr(mr) + section.offset_within_region;
        rb = qemu_ram_block_from_host(addr, false, &offset);
        if (to_private) {
            if (rb->page_size != qemu_host_page_size) {
                /*
                * shared memory is back'ed by  hugetlb, which is supposed to be
                * pre-allocated and doesn't need to be discarded
                */
                return 0;
            } else {
                ret = ram_block_discard_range(rb, offset, size);
            }
        } else {
            ret = ram_block_discard_guest_memfd_range(rb, offset, size);
        }
    } else {
        /*
         * Because vMMIO region must be shared, guest TD may convert vMMIO
         * region to shared explicitly.  Don't complain such case.  See
         * memory_region_type() for checking if the region is MMIO region.
         */
        if (!to_private &&
            !memory_region_is_ram(mr) &&
            !memory_region_is_ram_device(mr) &&
            !memory_region_is_rom(mr) &&
            !memory_region_is_romd(mr)) {
 		    ret = 0;
        } else {
            error_report("Convert non guest_memfd backed memory region "
                        "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s",
                        start, size, to_private ? "private" : "shared");
        }
    }
    memory_region_unref(section.mr);
    return ret;
 }
 int kvm_cpu_exec(CPUState *cpu)
 {
    struct kvm_run *run = cpu->kvm_run;
@@ -2882,18 +3086,20 @@ int kvm_cpu_exec(CPUState *cpu)
                ret = EXCP_INTERRUPT;
                break;
            }
-            fprintf(stderr, "error: kvm run failed %s\n",
+            if (!(run_ret == -EFAULT && run->exit_reason == KVM_EXIT_MEMORY_FAULT)) {
-                    strerror(-run_ret));
+                fprintf(stderr, "error: kvm run failed %s\n",
                        strerror(-run_ret));
 #ifdef TARGET_PPC
-            if (run_ret == -EBUSY) {
+                if (run_ret == -EBUSY) {
-                fprintf(stderr,
+                    fprintf(stderr,
-                        "This is probably because your SMT is enabled.\n"
+                            "This is probably because your SMT is enabled.\n"
-                        "VCPU can only run on primary threads with all "
+                            "VCPU can only run on primary threads with all "
-                        "secondary threads offline.\n");
+                            "secondary threads offline.\n");
-            }
+                }
 #endif
-            ret = -1;
+                ret = -1;
-            break;
+                break;
            }
        }
        trace_kvm_run_exit(cpu->cpu_index, run->exit_reason);
@@ -2976,6 +3182,16 @@ int kvm_cpu_exec(CPUState *cpu)
                break;
            }
            break;
        case KVM_EXIT_MEMORY_FAULT:
            if (run->memory_fault.flags & ~KVM_MEMORY_EXIT_FLAG_PRIVATE) {
                error_report("KVM_EXIT_MEMORY_FAULT: Unknown flag 0x%" PRIx64,
                             (uint64_t)run->memory_fault.flags);
                ret = -1;
                break;
            }
            ret = kvm_convert_memory(run->memory_fault.gpa, run->memory_fault.size,
                                     run->memory_fault.flags & KVM_MEMORY_EXIT_FLAG_PRIVATE);
            break;
        default:
            ret = kvm_arch_handle_exit(cpu, run);
            break;
@@ -4094,3 +4310,25 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp)
        query_stats_schema_vcpu(first_cpu, &stats_args);
    }
 }
 int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp)
 {
    int fd;
    struct kvm_create_guest_memfd guest_memfd = {
        .size = size,
        .flags = flags,
    };
    if (!kvm_guest_memfd_supported) {
        error_setg(errp, "KVM doesn't support guest memfd\n");
        return -1;
    }
    fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_GUEST_MEMFD, &guest_memfd);
    if (fd < 0) {
        error_setg_errno(errp, errno, "Error creating kvm guest memfd");
        return -1;
    }
    return fd;
 }
--- a/accel/kvm/trace-events
+++ b/accel/kvm/trace-events
@@ -15,7 +15,7 @@ kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
 kvm_irqchip_release_virq(int virq) "virq %d"
 kvm_set_ioeventfd_mmio(int fd, uint64_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%" PRIx64 " val=0x%x assign: %d size: %d match: %d"
 kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%x val=0x%x assign: %d size: %d match: %d"
-kvm_set_user_memory(uint32_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d"
+kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, uint32_t fd, uint64_t fd_offset, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " guest_memfd=%d" " guest_memfd_offset=0x%" PRIx64 " ret=%d"
 kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32
 kvm_resample_fd_notify(int gsi) "gsi %d"
 kvm_dirty_ring_full(int id) "vcpu %d"
@@ -31,3 +31,4 @@ kvm_cpu_exec(void) ""
 kvm_interrupt_exit_request(void) ""
 kvm_io_window_exit(void) ""
 kvm_run_exit_system_event(int cpu_index, uint32_t event_type) "cpu_index %d, system_even_type %"PRIu32
 kvm_convert_memory(uint64_t start, uint64_t size, const char *msg) "start 0x%" PRIx64 " size 0x%" PRIx64 " %s"
--- a/accel/stubs/kvm-stub.c
+++ b/accel/stubs/kvm-stub.c
@@ -129,3 +129,8 @@ bool kvm_hwpoisoned_mem(void)
 {
    return false;
 }
 int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp)
 {
    return -ENOSYS;
 }
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -85,6 +85,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
    ram_flags |= fb->readonly ? RAM_READONLY_FD : 0;
    ram_flags |= fb->rom == ON_OFF_AUTO_ON ? RAM_READONLY : 0;
    ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
    ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
    ram_flags |= fb->is_pmem ? RAM_PMEM : 0;
    ram_flags |= RAM_NAMED_FILE;
    return memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name,
--- a/backends/hostmem-memfd.c
+++ b/backends/hostmem-memfd.c
@@ -55,6 +55,7 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
    name = host_memory_backend_get_name(backend);
    ram_flags = backend->share ? RAM_SHARED : 0;
    ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
    ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
    return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name,
                                          backend->size, ram_flags, fd, 0, errp);
 }
--- a/backends/hostmem-ram.c
+++ b/backends/hostmem-ram.c
@@ -30,6 +30,7 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
    name = host_memory_backend_get_name(backend);
    ram_flags = backend->share ? RAM_SHARED : 0;
    ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
    ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
    return memory_region_init_ram_flags_nomigrate(&backend->mr, OBJECT(backend),
                                                  name, backend->size,
                                                  ram_flags, errp);
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -277,6 +277,7 @@ static void host_memory_backend_init(Object *obj)
    /* TODO: convert access to globals to compat properties */
    backend->merge = machine_mem_merge(machine);
    backend->dump = machine_dump_guest_core(machine);
    backend->guest_memfd = machine_require_guest_memfd(machine);
    backend->reserve = true;
    backend->prealloc_threads = machine->smp.cpus;
 }
--- a/configs/devices/i386-softmmu/default.mak
+++ b/configs/devices/i386-softmmu/default.mak
@@ -18,6 +18,7 @@
 #CONFIG_QXL=n
 #CONFIG_SEV=n
 #CONFIG_SGA=n
 #CONFIG_TDX=n
 #CONFIG_TEST_DEVICES=n
 #CONFIG_TPM_CRB=n
 #CONFIG_TPM_TIS_ISA=n
--- a/docs/system/confidential-guest-support.rst
+++ b/docs/system/confidential-guest-support.rst
@@ -38,6 +38,7 @@ Supported mechanisms
 Currently supported confidential guest mechanisms are:
 * AMD Secure Encrypted Virtualization (SEV) (see :doc:`i386/amd-memory-encryption`)
 * Intel Trust Domain Extension (TDX) (see :doc:`i386/tdx`)
 * POWER Protected Execution Facility (PEF) (see :ref:`power-papr-protected-execution-facility-pef`)
 * s390x Protected Virtualization (PV) (see :doc:`s390x/protvirt`)
--- a/docs/system/i386/tdx.rst
+++ b/docs/system/i386/tdx.rst
@@ -0,0 +1,143 @@
 Intel Trusted Domain eXtension (TDX)
 ====================================
 Intel Trusted Domain eXtensions (TDX) refers to an Intel technology that extends
 Virtual Machine Extensions (VMX) and Multi-Key Total Memory Encryption (MKTME)
 with a new kind of virtual machine guest called a Trust Domain (TD). A TD runs
 in a CPU mode that is designed to protect the confidentiality of its memory
 contents and its CPU state from any other software, including the hosting
 Virtual Machine Monitor (VMM), unless explicitly shared by the TD itself.
 Prerequisites
 -------------
 To run TD, the physical machine needs to have TDX module loaded and initialized
 while KVM hypervisor has TDX support and has TDX enabled. If those requirements
 are met, the ``KVM_CAP_VM_TYPES`` will report the support of ``KVM_X86_TDX_VM``.
 Trust Domain Virtual Firmware (TDVF)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Trust Domain Virtual Firmware (TDVF) is required to provide TD services to boot
 TD Guest OS. TDVF needs to be copied to guest private memory and measured before
 the TD boots.
 KVM vcpu ioctl ``KVM_MEMORY_MAPPING`` can be used to populates the TDVF content
 into its private memory.
 Since TDX doesn't support readonly memslot, TDVF cannot be mapped as pflash
 device and it actually works as RAM. "-bios" option is chosen to load TDVF.
 OVMF is the opensource firmware that implements the TDVF support. Thus the
 command line to specify and load TDVF is ``-bios OVMF.fd``
 KVM private memory
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 TD's memory (RAM) needs to be able to be transformed between private and shared.
 Its BIOS (OVMF/TDVF) needs to be mapped as private as well. Thus QEMU needs to
 allocate private guest memfd for them via KVM's IOCTL (KVM_CREATE_GUEST_MEMFD),
 which requires KVM is newer enough that reports KVM_CAP_GUEST_MEMFD.
 Feature Control
 ---------------
 Unlike non-TDX VM, the CPU features (enumerated by CPU or MSR) of a TD is not
 under full control of VMM. VMM can only configure part of features of a TD on
 ``KVM_TDX_INIT_VM`` command of VM scope ``MEMORY_ENCRYPT_OP`` ioctl.
 The configurable features have three types:
 - Attributes:
  - PKS (bit 30) controls whether Supervisor Protection Keys is exposed to TD,
  which determines related CPUID bit and CR4 bit;
  - PERFMON (bit 63) controls whether PMU is exposed to TD.
 - XSAVE related features (XFAM):
  XFAM is a 64b mask, which has the same format as XCR0 or IA32_XSS MSR. It
  determines the set of extended features available for use by the guest TD.
 - CPUID features:
  Only some bits of some CPUID leaves are directly configurable by VMM.
 What features can be configured is reported via TDX capabilities.
 TDX capabilities
 ~~~~~~~~~~~~~~~~
 The VM scope ``MEMORY_ENCRYPT_OP`` ioctl provides command ``KVM_TDX_CAPABILITIES``
 to get the TDX capabilities from KVM. It returns a data structure of
 ``struct kvm_tdx_capabilites``, which tells the supported configuration of
 attributes, XFAM and CPUIDs.
 TD attestation
 --------------
 In TD guest, the attestation process is used to verify the TDX guest
 trustworthiness to other entities before provisioning secrets to the guest.
 TD attestation is initiated first by calling TDG.MR.REPORT inside TD to get the
 REPORT. Then the REPORT data needs to be converted into a remotely verifiable
 Quote by SGX Quoting Enclave (QE).
 A host daemon, Quote Generation Service (QGS), provides the functionality of
 SGX GE. It provides a socket address, to which a TD guest can connect via
 "quote-generation-socket" property. On the request of <GETQUOTE> from TD guest,
 QEMU sends the TDREPORT to QGS via "quote-generation-socket" socket, and gets
 the returning Quoting and return it back to TD guest.
 Though "quote-generation-socket" is optional for booting the TD guest, it's a
 must for supporting TD guest atteatation.
 Launching a TD (TDX VM)
 -----------------------
 To launch a TDX guest, below are new added and required:
 .. parsed-literal::
    |qemu_system_x86| \\
        -object tdx-guest,id=tdx0 \\
        -machine ...,kernel-irqchip=split,confidential-guest-support=tdx0 \\
        -bios OVMF.fd \\
 If TD attestation support is wanted:
 .. parsed-literal::
    |qemu_system_x86| \\
        -object '{"qom-type":"tdx-guest","id":"tdx0","quote-generation-socket":{"type": "vsock", "cid":"1","port":"1234"}}' \\
        -machine ...,kernel-irqchip=split,confidential-guest-support=tdx0 \\
        -bios OVMF.fd \\
 Debugging
 ---------
 Bit 0 of TD attributes, is DEBUG bit, which decides if the TD runs in off-TD
 debug mode. When in off-TD debug mode, TD's VCPU state and private memory are
 accessible via given SEAMCALLs. This requires KVM to expose APIs to invoke those
 SEAMCALLs and resonponding QEMU change.
 It's targeted as future work.
 restrictions
 ------------
 - kernel-irqchip must be split;
 - No readonly support for private memory;
 - No SMM support: SMM support requires manipulating the guset register states
   which is not allowed;
 Live Migration
 --------------
 TODO
 References
 ----------
 - `TDX Homepage <https://www.intel.com/content/www/us/en/developer/articles/technical/intel-trust-domain-extensions.html>`__
 - `SGX QE <https://github.com/intel/SGXDataCenterAttestationPrimitives/tree/master/QuoteGeneration>`__
--- a/docs/system/target-i386.rst
+++ b/docs/system/target-i386.rst
@@ -29,6 +29,7 @@ Architectural features
   i386/kvm-pv
   i386/sgx
   i386/amd-memory-encryption
   i386/tdx
 OS requirements
 ~~~~~~~~~~~~~~~
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -1192,6 +1192,11 @@ bool machine_mem_merge(MachineState *machine)
    return machine->mem_merge;
 }
 bool machine_require_guest_memfd(MachineState *machine)
 {
    return machine->require_guest_memfd;
 }
 static char *cpu_slot_to_string(const CPUArchId *cpu)
 {
    GString *s = g_string_new(NULL);
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -10,6 +10,11 @@ config SGX
    bool
    depends on KVM
 config TDX
    bool
    select X86_FW_OVMF
    depends on KVM
 config PC
    bool
    imply APPLESMC
@@ -26,6 +31,7 @@ config PC
    imply QXL
    imply SEV
    imply SGX
    imply TDX
    imply TEST_DEVICES
    imply TPM_CRB
    imply TPM_TIS_ISA
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -964,7 +964,8 @@ static void build_dbg_aml(Aml *table)
    aml_append(table, scope);
 }
-static Aml *build_link_dev(const char *name, uint8_t uid, Aml *reg)
+static Aml *build_link_dev(const char *name, uint8_t uid, Aml *reg,
                           bool level_trigger_unsupported)
 {
    Aml *dev;
    Aml *crs;
@@ -976,7 +977,10 @@ static Aml *build_link_dev(const char *name, uint8_t uid, Aml *reg)
    aml_append(dev, aml_name_decl("_UID", aml_int(uid)));
    crs = aml_resource_template();
-    aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL, AML_ACTIVE_HIGH,
+    aml_append(crs, aml_interrupt(AML_CONSUMER,
                                  level_trigger_unsupported ?
                                  AML_EDGE : AML_LEVEL,
                                  AML_ACTIVE_HIGH,
                                  AML_SHARED, irqs, ARRAY_SIZE(irqs)));
    aml_append(dev, aml_name_decl("_PRS", crs));
@@ -1000,7 +1004,8 @@ static Aml *build_link_dev(const char *name, uint8_t uid, Aml *reg)
    return dev;
 }
-static Aml *build_gsi_link_dev(const char *name, uint8_t uid, uint8_t gsi)
+static Aml *build_gsi_link_dev(const char *name, uint8_t uid,
                               uint8_t gsi, bool level_trigger_unsupported)
 {
    Aml *dev;
    Aml *crs;
@@ -1013,7 +1018,10 @@ static Aml *build_gsi_link_dev(const char *name, uint8_t uid, uint8_t gsi)
    crs = aml_resource_template();
    irqs = gsi;
-    aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL, AML_ACTIVE_HIGH,
+    aml_append(crs, aml_interrupt(AML_CONSUMER,
                                  level_trigger_unsupported ?
                                  AML_EDGE : AML_LEVEL,
                                  AML_ACTIVE_HIGH,
                                  AML_SHARED, &irqs, 1));
    aml_append(dev, aml_name_decl("_PRS", crs));
@@ -1032,7 +1040,7 @@ static Aml *build_gsi_link_dev(const char *name, uint8_t uid, uint8_t gsi)
 }
 /* _CRS method - get current settings */
-static Aml *build_iqcr_method(bool is_piix4)
+static Aml *build_iqcr_method(bool is_piix4, bool level_trigger_unsupported)
 {
    Aml *if_ctx;
    uint32_t irqs;
@@ -1040,7 +1048,9 @@ static Aml *build_iqcr_method(bool is_piix4)
    Aml *crs = aml_resource_template();
    irqs = 0;
-    aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL,
+    aml_append(crs, aml_interrupt(AML_CONSUMER,
                                  level_trigger_unsupported ?
                                  AML_EDGE : AML_LEVEL,
                                  AML_ACTIVE_HIGH, AML_SHARED, &irqs, 1));
    aml_append(method, aml_name_decl("PRR0", crs));
@@ -1074,7 +1084,7 @@ static Aml *build_irq_status_method(void)
    return method;
 }
-static void build_piix4_pci0_int(Aml *table)
+static void build_piix4_pci0_int(Aml *table, bool level_trigger_unsupported)
 {
    Aml *dev;
    Aml *crs;
@@ -1087,12 +1097,16 @@ static void build_piix4_pci0_int(Aml *table)
    aml_append(sb_scope, pci0_scope);
    aml_append(sb_scope, build_irq_status_method());
-    aml_append(sb_scope, build_iqcr_method(true));
+    aml_append(sb_scope, build_iqcr_method(true, level_trigger_unsupported));
-    aml_append(sb_scope, build_link_dev("LNKA", 0, aml_name("PRQ0")));
+    aml_append(sb_scope, build_link_dev("LNKA", 0, aml_name("PRQ0"),
-    aml_append(sb_scope, build_link_dev("LNKB", 1, aml_name("PRQ1")));
+                                        level_trigger_unsupported));
-    aml_append(sb_scope, build_link_dev("LNKC", 2, aml_name("PRQ2")));
+    aml_append(sb_scope, build_link_dev("LNKB", 1, aml_name("PRQ1"),
-    aml_append(sb_scope, build_link_dev("LNKD", 3, aml_name("PRQ3")));
+                                        level_trigger_unsupported));
    aml_append(sb_scope, build_link_dev("LNKC", 2, aml_name("PRQ2"),
                                        level_trigger_unsupported));
    aml_append(sb_scope, build_link_dev("LNKD", 3, aml_name("PRQ3"),
                                        level_trigger_unsupported));
    dev = aml_device("LNKS");
    {
@@ -1101,7 +1115,9 @@ static void build_piix4_pci0_int(Aml *table)
        crs = aml_resource_template();
        irqs = 9;
-        aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL,
+        aml_append(crs, aml_interrupt(AML_CONSUMER,
                                      level_trigger_unsupported ?
                                      AML_EDGE : AML_LEVEL,
                                      AML_ACTIVE_HIGH, AML_SHARED,
                                      &irqs, 1));
        aml_append(dev, aml_name_decl("_PRS", crs));
@@ -1187,7 +1203,7 @@ static Aml *build_q35_routing_table(const char *str)
    return pkg;
 }
-static void build_q35_pci0_int(Aml *table)
+static void build_q35_pci0_int(Aml *table, bool level_trigger_unsupported)
 {
    Aml *method;
    Aml *sb_scope = aml_scope("_SB");
@@ -1226,25 +1242,41 @@ static void build_q35_pci0_int(Aml *table)
    aml_append(sb_scope, pci0_scope);
    aml_append(sb_scope, build_irq_status_method());
-    aml_append(sb_scope, build_iqcr_method(false));
+    aml_append(sb_scope, build_iqcr_method(false, level_trigger_unsupported));
-    aml_append(sb_scope, build_link_dev("LNKA", 0, aml_name("PRQA")));
+    aml_append(sb_scope, build_link_dev("LNKA", 0, aml_name("PRQA"),
-    aml_append(sb_scope, build_link_dev("LNKB", 1, aml_name("PRQB")));
+                                        level_trigger_unsupported));
-    aml_append(sb_scope, build_link_dev("LNKC", 2, aml_name("PRQC")));
+    aml_append(sb_scope, build_link_dev("LNKB", 1, aml_name("PRQB"),
-    aml_append(sb_scope, build_link_dev("LNKD", 3, aml_name("PRQD")));
+                                        level_trigger_unsupported));
-    aml_append(sb_scope, build_link_dev("LNKE", 4, aml_name("PRQE")));
+    aml_append(sb_scope, build_link_dev("LNKC", 2, aml_name("PRQC"),
-    aml_append(sb_scope, build_link_dev("LNKF", 5, aml_name("PRQF")));
+                                        level_trigger_unsupported));
-    aml_append(sb_scope, build_link_dev("LNKG", 6, aml_name("PRQG")));
+    aml_append(sb_scope, build_link_dev("LNKD", 3, aml_name("PRQD"),
-    aml_append(sb_scope, build_link_dev("LNKH", 7, aml_name("PRQH")));
+                                        level_trigger_unsupported));
    aml_append(sb_scope, build_link_dev("LNKE", 4, aml_name("PRQE"),
                                        level_trigger_unsupported));
    aml_append(sb_scope, build_link_dev("LNKF", 5, aml_name("PRQF"),
                                        level_trigger_unsupported));
    aml_append(sb_scope, build_link_dev("LNKG", 6, aml_name("PRQG"),
                                        level_trigger_unsupported));
    aml_append(sb_scope, build_link_dev("LNKH", 7, aml_name("PRQH"),
                                        level_trigger_unsupported));
-    aml_append(sb_scope, build_gsi_link_dev("GSIA", 0x10, 0x10));
+    aml_append(sb_scope, build_gsi_link_dev("GSIA", 0x10, 0x10,
-    aml_append(sb_scope, build_gsi_link_dev("GSIB", 0x11, 0x11));
+                                            level_trigger_unsupported));
-    aml_append(sb_scope, build_gsi_link_dev("GSIC", 0x12, 0x12));
+    aml_append(sb_scope, build_gsi_link_dev("GSIB", 0x11, 0x11,
-    aml_append(sb_scope, build_gsi_link_dev("GSID", 0x13, 0x13));
+                                            level_trigger_unsupported));
-    aml_append(sb_scope, build_gsi_link_dev("GSIE", 0x14, 0x14));
+    aml_append(sb_scope, build_gsi_link_dev("GSIC", 0x12, 0x12,
-    aml_append(sb_scope, build_gsi_link_dev("GSIF", 0x15, 0x15));
+                                            level_trigger_unsupported));
-    aml_append(sb_scope, build_gsi_link_dev("GSIG", 0x16, 0x16));
+    aml_append(sb_scope, build_gsi_link_dev("GSID", 0x13, 0x13,
-    aml_append(sb_scope, build_gsi_link_dev("GSIH", 0x17, 0x17));
+                                            level_trigger_unsupported));
    aml_append(sb_scope, build_gsi_link_dev("GSIE", 0x14, 0x14,
                                            level_trigger_unsupported));
    aml_append(sb_scope, build_gsi_link_dev("GSIF", 0x15, 0x15,
                                            level_trigger_unsupported));
    aml_append(sb_scope, build_gsi_link_dev("GSIG", 0x16, 0x16,
                                            level_trigger_unsupported));
    aml_append(sb_scope, build_gsi_link_dev("GSIH", 0x17, 0x17,
                                            level_trigger_unsupported));
    aml_append(table, sb_scope);
 }
@@ -1426,6 +1458,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
    PCMachineState *pcms = PC_MACHINE(machine);
    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);
    X86MachineState *x86ms = X86_MACHINE(machine);
    bool level_trigger_unsupported = x86ms->eoi_intercept_unsupported;
    AcpiMcfgInfo mcfg;
    bool mcfg_valid = !!acpi_get_mcfg(&mcfg);
    uint32_t nr_mem = machine->ram_slots;
@@ -1458,7 +1491,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
        if (pm->pcihp_bridge_en || pm->pcihp_root_en) {
            build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base);
        }
-        build_piix4_pci0_int(dsdt);
+        build_piix4_pci0_int(dsdt, level_trigger_unsupported);
    } else if (q35) {
        sb_scope = aml_scope("_SB");
        dev = aml_device("PCI0");
@@ -1502,7 +1535,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
        if (pm->pcihp_bridge_en) {
            build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base);
        }
-        build_q35_pci0_int(dsdt);
+        build_q35_pci0_int(dsdt, level_trigger_unsupported);
    }
    if (misc->has_hpet) {
--- a/hw/i386/acpi-common.c
+++ b/hw/i386/acpi-common.c
@@ -103,6 +103,7 @@ void acpi_build_madt(GArray *table_data, BIOSLinker *linker,
    const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(MACHINE(x86ms));
    AcpiTable table = { .sig = "APIC", .rev = 3, .oem_id = oem_id,
                        .oem_table_id = oem_table_id };
    bool level_trigger_unsupported = x86ms->eoi_intercept_unsupported;
    acpi_table_begin(&table, table_data);
    /* Local APIC Address */
@@ -122,18 +123,42 @@ void acpi_build_madt(GArray *table_data, BIOSLinker *linker,
                     IO_APIC_SECONDARY_ADDRESS, IO_APIC_SECONDARY_IRQBASE);
    }
-    if (x86mc->apic_xrupt_override) {
+    if (level_trigger_unsupported) {
-        build_xrupt_override(table_data, 0, 2,
+        /* Force edge trigger */
-            0 /* Flags: Conforms to the specifications of the bus */);
+        if (x86mc->apic_xrupt_override) {
-    }
+            build_xrupt_override(table_data, 0, 2,
-
+                                 /* Flags: active high, edge triggered */
-    for (i = 1; i < 16; i++) {
+                                 1 | (1 << 2));
-        if (!(x86ms->pci_irq_mask & (1 << i))) {
+        }
-            /* No need for a INT source override structure. */
+
-            continue;
+        for (i = x86mc->apic_xrupt_override ? 1 : 0; i < 16; i++) {
            build_xrupt_override(table_data, i, i,
                                 /* Flags: active high, edge triggered */
                                 1 | (1 << 2));
        }
        if (x86ms->ioapic2) {
            for (i = 0; i < 16; i++) {
                build_xrupt_override(table_data, IO_APIC_SECONDARY_IRQBASE + i,
                                     IO_APIC_SECONDARY_IRQBASE + i,
                                     /* Flags: active high, edge triggered */
                                     1 | (1 << 2));
            }
        }
    } else {
        if (x86mc->apic_xrupt_override) {
            build_xrupt_override(table_data, 0, 2,
                    0 /* Flags: Conforms to the specifications of the bus */);
        }
        for (i = 1; i < 16; i++) {
            if (!(x86ms->pci_irq_mask & (1 << i))) {
                /* No need for a INT source override structure. */
                continue;
            }
            build_xrupt_override(table_data, i, i,
                0xd /* Flags: Active high, Level Triggered */);
        }
        build_xrupt_override(table_data, i, i,
            0xd /* Flags: Active high, Level Triggered */);
    }
    if (x2apic_mode) {
--- a/hw/i386/meson.build
+++ b/hw/i386/meson.build
@@ -28,6 +28,7 @@ i386_ss.add(when: 'CONFIG_PC', if_true: files(
  'port92.c'))
 i386_ss.add(when: 'CONFIG_X86_FW_OVMF', if_true: files('pc_sysfw_ovmf.c'),
                                        if_false: files('pc_sysfw_ovmf-stubs.c'))
 i386_ss.add(when: 'CONFIG_TDX', if_true: files('tdvf.c', 'tdvf-hob.c'))
 subdir('kvm')
 subdir('xen')
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -43,6 +43,7 @@
 #include "sysemu/xen.h"
 #include "sysemu/reset.h"
 #include "kvm/kvm_i386.h"
 #include "kvm/tdx.h"
 #include "hw/xen/xen.h"
 #include "qapi/qmp/qlist.h"
 #include "qemu/error-report.h"
@@ -1028,16 +1029,18 @@ void pc_memory_init(PCMachineState *pcms,
    /* Initialize PC system firmware */
    pc_system_firmware_init(pcms, rom_memory);
-    option_rom_mr = g_malloc(sizeof(*option_rom_mr));
+    if (!is_tdx_vm()) {
-    memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
+        option_rom_mr = g_malloc(sizeof(*option_rom_mr));
-                           &error_fatal);
+        memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
-    if (pcmc->pci_enabled) {
+                            &error_fatal);
-        memory_region_set_readonly(option_rom_mr, true);
+        if (pcmc->pci_enabled) {
            memory_region_set_readonly(option_rom_mr, true);
        }
        memory_region_add_subregion_overlap(rom_memory,
                                            PC_ROM_MIN_VGA,
                                            option_rom_mr,
                                            1);
    }
    memory_region_add_subregion_overlap(rom_memory,
                                        PC_ROM_MIN_VGA,
                                        option_rom_mr,
                                        1);
    fw_cfg = fw_cfg_arch_create(machine,
                                x86ms->boot_cpus, x86ms->apic_id_limit);
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -212,6 +212,8 @@ static void pc_q35_init(MachineState *machine)
                            x86ms->above_4g_mem_size, NULL);
    object_property_set_bool(phb, PCI_HOST_BYPASS_IOMMU,
                             pcms->default_bus_bypass_iommu, NULL);
    object_property_set_bool(phb, PCI_HOST_PROP_SMM_RANGES,
                             x86_machine_is_smm_enabled(x86ms), NULL);
    sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal);
    /* pci */
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -37,6 +37,7 @@
 #include "hw/block/flash.h"
 #include "sysemu/kvm.h"
 #include "sev.h"
 #include "kvm/tdx.h"
 #define FLASH_SECTOR_SIZE 4096
@@ -254,5 +255,11 @@ void x86_firmware_configure(void *ptr, int size)
        }
        sev_encrypt_flash(ptr, size, &error_fatal);
    } else if (is_tdx_vm()) {
        ret = tdx_parse_tdvf(ptr, size);
        if (ret) {
            error_report("failed to parse TDVF for TDX VM");
            exit(1);
        }
    }
 }
--- a/hw/i386/tdvf-hob.c
+++ b/hw/i386/tdvf-hob.c
@@ -0,0 +1,147 @@
 /*
 * SPDX-License-Identifier: GPL-2.0-or-later
 * Copyright (c) 2020 Intel Corporation
 * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
 *                        <isaku.yamahata at intel.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, see <http://www.gnu.org/licenses/>.
 */
 #include "qemu/osdep.h"
 #include "qemu/log.h"
 #include "qemu/error-report.h"
 #include "e820_memory_layout.h"
 #include "hw/i386/pc.h"
 #include "hw/i386/x86.h"
 #include "hw/pci/pcie_host.h"
 #include "sysemu/kvm.h"
 #include "standard-headers/uefi/uefi.h"
 #include "tdvf-hob.h"
 typedef struct TdvfHob {
    hwaddr hob_addr;
    void *ptr;
    int size;
    /* working area */
    void *current;
    void *end;
 } TdvfHob;
 static uint64_t tdvf_current_guest_addr(const TdvfHob *hob)
 {
    return hob->hob_addr + (hob->current - hob->ptr);
 }
 static void tdvf_align(TdvfHob *hob, size_t align)
 {
    hob->current = QEMU_ALIGN_PTR_UP(hob->current, align);
 }
 static void *tdvf_get_area(TdvfHob *hob, uint64_t size)
 {
    void *ret;
    if (hob->current + size > hob->end) {
        error_report("TD_HOB overrun, size = 0x%" PRIx64, size);
        exit(1);
    }
    ret = hob->current;
    hob->current += size;
    tdvf_align(hob, 8);
    return ret;
 }
 static void tdvf_hob_add_memory_resources(TdxGuest *tdx, TdvfHob *hob)
 {
    EFI_HOB_RESOURCE_DESCRIPTOR *region;
    EFI_RESOURCE_ATTRIBUTE_TYPE attr;
    EFI_RESOURCE_TYPE resource_type;
    TdxRamEntry *e;
    int i;
    for (i = 0; i < tdx->nr_ram_entries; i++) {
        e = &tdx->ram_entries[i];
        if (e->type == TDX_RAM_UNACCEPTED) {
            resource_type = EFI_RESOURCE_MEMORY_UNACCEPTED;
            attr = EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED;
        } else if (e->type == TDX_RAM_ADDED){
            resource_type = EFI_RESOURCE_SYSTEM_MEMORY;
            attr = EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE;
        } else {
            error_report("unknown TDX_RAM_ENTRY type %d", e->type);
            exit(1);
        }
        region = tdvf_get_area(hob, sizeof(*region));
        *region = (EFI_HOB_RESOURCE_DESCRIPTOR) {
            .Header = {
                .HobType = EFI_HOB_TYPE_RESOURCE_DESCRIPTOR,
                .HobLength = cpu_to_le16(sizeof(*region)),
                .Reserved = cpu_to_le32(0),
            },
            .Owner = EFI_HOB_OWNER_ZERO,
            .ResourceType = cpu_to_le32(resource_type),
            .ResourceAttribute = cpu_to_le32(attr),
            .PhysicalStart = cpu_to_le64(e->address),
            .ResourceLength = cpu_to_le64(e->length),
        };
    }
 }
 void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob)
 {
    TdvfHob hob = {
        .hob_addr = td_hob->address,
        .size = td_hob->size,
        .ptr = td_hob->mem_ptr,
        .current = td_hob->mem_ptr,
        .end = td_hob->mem_ptr + td_hob->size,
    };
    EFI_HOB_GENERIC_HEADER *last_hob;
    EFI_HOB_HANDOFF_INFO_TABLE *hit;
    /* Note, Efi{Free}Memory{Bottom,Top} are ignored, leave 'em zeroed. */
    hit = tdvf_get_area(&hob, sizeof(*hit));
    *hit = (EFI_HOB_HANDOFF_INFO_TABLE) {
        .Header = {
            .HobType = EFI_HOB_TYPE_HANDOFF,
            .HobLength = cpu_to_le16(sizeof(*hit)),
            .Reserved = cpu_to_le32(0),
        },
        .Version = cpu_to_le32(EFI_HOB_HANDOFF_TABLE_VERSION),
        .BootMode = cpu_to_le32(0),
        .EfiMemoryTop = cpu_to_le64(0),
        .EfiMemoryBottom = cpu_to_le64(0),
        .EfiFreeMemoryTop = cpu_to_le64(0),
        .EfiFreeMemoryBottom = cpu_to_le64(0),
        .EfiEndOfHobList = cpu_to_le64(0), /* initialized later */
    };
    tdvf_hob_add_memory_resources(tdx, &hob);
    last_hob = tdvf_get_area(&hob, sizeof(*last_hob));
    *last_hob =  (EFI_HOB_GENERIC_HEADER) {
        .HobType = EFI_HOB_TYPE_END_OF_HOB_LIST,
        .HobLength = cpu_to_le16(sizeof(*last_hob)),
        .Reserved = cpu_to_le32(0),
    };
    hit->EfiEndOfHobList = tdvf_current_guest_addr(&hob);
 }
--- a/hw/i386/tdvf-hob.h
+++ b/hw/i386/tdvf-hob.h
@@ -0,0 +1,24 @@
 #ifndef HW_I386_TD_HOB_H
 #define HW_I386_TD_HOB_H
 #include "hw/i386/tdvf.h"
 #include "target/i386/kvm/tdx.h"
 void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob);
 #define EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE     \
    (EFI_RESOURCE_ATTRIBUTE_PRESENT |           \
     EFI_RESOURCE_ATTRIBUTE_INITIALIZED |       \
     EFI_RESOURCE_ATTRIBUTE_TESTED)
 #define EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED  \
    (EFI_RESOURCE_ATTRIBUTE_PRESENT |           \
     EFI_RESOURCE_ATTRIBUTE_INITIALIZED |       \
     EFI_RESOURCE_ATTRIBUTE_TESTED)
 #define EFI_RESOURCE_ATTRIBUTE_TDVF_MMIO        \
    (EFI_RESOURCE_ATTRIBUTE_PRESENT     |       \
     EFI_RESOURCE_ATTRIBUTE_INITIALIZED |       \
     EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE)
 #endif
--- a/hw/i386/tdvf.c
+++ b/hw/i386/tdvf.c
@@ -0,0 +1,200 @@
 /*
 * SPDX-License-Identifier: GPL-2.0-or-later
 * Copyright (c) 2020 Intel Corporation
 * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
 *                        <isaku.yamahata at intel.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, see <http://www.gnu.org/licenses/>.
 */
 #include "qemu/osdep.h"
 #include "qemu/error-report.h"
 #include "hw/i386/pc.h"
 #include "hw/i386/tdvf.h"
 #include "sysemu/kvm.h"
 #define TDX_METADATA_OFFSET_GUID    "e47a6535-984a-4798-865e-4685a7bf8ec2"
 #define TDX_METADATA_VERSION        1
 #define TDVF_SIGNATURE              0x46564454 /* TDVF as little endian */
 typedef struct {
    uint32_t DataOffset;
    uint32_t RawDataSize;
    uint64_t MemoryAddress;
    uint64_t MemoryDataSize;
    uint32_t Type;
    uint32_t Attributes;
 } TdvfSectionEntry;
 typedef struct {
    uint32_t Signature;
    uint32_t Length;
    uint32_t Version;
    uint32_t NumberOfSectionEntries;
    TdvfSectionEntry SectionEntries[];
 } TdvfMetadata;
 struct tdx_metadata_offset {
    uint32_t offset;
 };
 static TdvfMetadata *tdvf_get_metadata(void *flash_ptr, int size)
 {
    TdvfMetadata *metadata;
    uint32_t offset = 0;
    uint8_t *data;
    if ((uint32_t) size != size) {
        return NULL;
    }
    if (pc_system_ovmf_table_find(TDX_METADATA_OFFSET_GUID, &data, NULL)) {
        offset = size - le32_to_cpu(((struct tdx_metadata_offset *)data)->offset);
        if (offset + sizeof(*metadata) > size) {
            return NULL;
        }
    } else {
        error_report("Cannot find TDX_METADATA_OFFSET_GUID");
        return NULL;
    }
    metadata = flash_ptr + offset;
    /* Finally, verify the signature to determine if this is a TDVF image. */
    metadata->Signature = le32_to_cpu(metadata->Signature);
    if (metadata->Signature != TDVF_SIGNATURE) {
        error_report("Invalid TDVF signature in metadata!");
        return NULL;
    }
    /* Sanity check that the TDVF doesn't overlap its own metadata. */
    metadata->Length = le32_to_cpu(metadata->Length);
    if (offset + metadata->Length > size) {
        return NULL;
    }
    /* Only version 1 is supported/defined. */
    metadata->Version = le32_to_cpu(metadata->Version);
    if (metadata->Version != TDX_METADATA_VERSION) {
        return NULL;
    }
    return metadata;
 }
 static int tdvf_parse_and_check_section_entry(const TdvfSectionEntry *src,
                                              TdxFirmwareEntry *entry)
 {
    entry->data_offset = le32_to_cpu(src->DataOffset);
    entry->data_len = le32_to_cpu(src->RawDataSize);
    entry->address = le64_to_cpu(src->MemoryAddress);
    entry->size = le64_to_cpu(src->MemoryDataSize);
    entry->type = le32_to_cpu(src->Type);
    entry->attributes = le32_to_cpu(src->Attributes);
    /* sanity check */
    if (entry->size < entry->data_len) {
        error_report("Broken metadata RawDataSize 0x%x MemoryDataSize 0x%lx",
                     entry->data_len, entry->size);
        return -1;
    }
    if (!QEMU_IS_ALIGNED(entry->address, TARGET_PAGE_SIZE)) {
        error_report("MemoryAddress 0x%lx not page aligned", entry->address);
        return -1;
    }
    if (!QEMU_IS_ALIGNED(entry->size, TARGET_PAGE_SIZE)) {
        error_report("MemoryDataSize 0x%lx not page aligned", entry->size);
        return -1;
    }
    switch (entry->type) {
    case TDVF_SECTION_TYPE_BFV:
    case TDVF_SECTION_TYPE_CFV:
        /* The sections that must be copied from firmware image to TD memory */
        if (entry->data_len == 0) {
            error_report("%d section with RawDataSize == 0", entry->type);
            return -1;
        }
        break;
    case TDVF_SECTION_TYPE_TD_HOB:
    case TDVF_SECTION_TYPE_TEMP_MEM:
        /* The sections that no need to be copied from firmware image */
        if (entry->data_len != 0) {
            error_report("%d section with RawDataSize 0x%x != 0",
                         entry->type, entry->data_len);
            return -1;
        }
        break;
    default:
        error_report("TDVF contains unsupported section type %d", entry->type);
        return -1;
    }
    return 0;
 }
 int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size)
 {
    TdvfSectionEntry *sections;
    TdvfMetadata *metadata;
    ssize_t entries_size;
    uint32_t len, i;
    metadata = tdvf_get_metadata(flash_ptr, size);
    if (!metadata) {
        return -EINVAL;
    }
    //load and parse metadata entries
    fw->nr_entries = le32_to_cpu(metadata->NumberOfSectionEntries);
    if (fw->nr_entries < 2) {
        error_report("Invalid number of fw entries (%u) in TDVF", fw->nr_entries);
        return -EINVAL;
    }
    len = le32_to_cpu(metadata->Length);
    entries_size = fw->nr_entries * sizeof(TdvfSectionEntry);
    if (len != sizeof(*metadata) + entries_size) {
        error_report("TDVF metadata len (0x%x) mismatch, expected (0x%x)",
                     len, (uint32_t)(sizeof(*metadata) + entries_size));
        return -EINVAL;
    }
    fw->entries = g_new(TdxFirmwareEntry, fw->nr_entries);
    sections = g_new(TdvfSectionEntry, fw->nr_entries);
    if (!memcpy(sections, (void *)metadata + sizeof(*metadata), entries_size))  {
        error_report("Failed to read TDVF section entries");
        goto err;
    }
    for (i = 0; i < fw->nr_entries; i++) {
        if (tdvf_parse_and_check_section_entry(&sections[i], &fw->entries[i])) {
            goto err;
        }
    }
    g_free(sections);
    fw->mem_ptr = flash_ptr;
    return 0;
 err:
    g_free(sections);
    fw->entries = 0;
    g_free(fw->entries);
    return -EINVAL;
 }
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -47,6 +47,7 @@
 #include "hw/intc/i8259.h"
 #include "hw/rtc/mc146818rtc.h"
 #include "target/i386/sev.h"
 #include "kvm/tdx.h"
 #include "hw/acpi/cpu_hotplug.h"
 #include "hw/irq.h"
@@ -1157,9 +1158,17 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
        (bios_size % 65536) != 0) {
        goto bios_error;
    }
    bios = g_malloc(sizeof(*bios));
-    memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal);
+    if (is_tdx_vm()) {
-    if (sev_enabled()) {
+        memory_region_init_ram_guest_memfd(bios, NULL, "pc.bios", bios_size,
                                           &error_fatal);
        tdx_set_tdvf_region(bios);
    } else {
        memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal);
    }
    if (sev_enabled() || is_tdx_vm()) {
        /*
         * The concept of a "reset" simply doesn't exist for
         * confidential computing guests, we have to destroy and
@@ -1181,17 +1190,20 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
    }
    g_free(filename);
-    /* map the last 128KB of the BIOS in ISA space */
+    /* For TDX, alias different GPAs to same private memory is not supported */
-    isa_bios_size = MIN(bios_size, 128 * KiB);
+    if (!is_tdx_vm()) {
-    isa_bios = g_malloc(sizeof(*isa_bios));
+        /* map the last 128KB of the BIOS in ISA space */
-    memory_region_init_alias(isa_bios, NULL, "isa-bios", bios,
+        isa_bios_size = MIN(bios_size, 128 * KiB);
-                             bios_size - isa_bios_size, isa_bios_size);
+        isa_bios = g_malloc(sizeof(*isa_bios));
-    memory_region_add_subregion_overlap(rom_memory,
+        memory_region_init_alias(isa_bios, NULL, "isa-bios", bios,
-                                        0x100000 - isa_bios_size,
+                                bios_size - isa_bios_size, isa_bios_size);
-                                        isa_bios,
+        memory_region_add_subregion_overlap(rom_memory,
-                                        1);
+                                            0x100000 - isa_bios_size,
-    if (!isapc_ram_fw) {
+                                            isa_bios,
-        memory_region_set_readonly(isa_bios, true);
+                                            1);
        if (!isapc_ram_fw) {
            memory_region_set_readonly(isa_bios, true);
        }
    }
    /* map all the bios at the top of memory */
@@ -1389,6 +1401,17 @@ static void machine_set_sgx_epc(Object *obj, Visitor *v, const char *name,
    qapi_free_SgxEPCList(list);
 }
 static int x86_kvm_type(MachineState *ms, const char *vm_type)
 {
    X86MachineState *x86ms = X86_MACHINE(ms);
    int kvm_type;
    kvm_type = kvm_enabled() ? kvm_get_vm_type(ms, vm_type) : 0;
    x86ms->vm_type = kvm_type;
    return kvm_type;
 }
 static void x86_machine_initfn(Object *obj)
 {
    X86MachineState *x86ms = X86_MACHINE(obj);
@@ -1402,6 +1425,7 @@ static void x86_machine_initfn(Object *obj)
    x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
    x86ms->bus_lock_ratelimit = 0;
    x86ms->above_4g_mem_start = 4 * GiB;
    x86ms->eoi_intercept_unsupported = false;
 }
 static void x86_machine_class_init(ObjectClass *oc, void *data)
@@ -1413,6 +1437,7 @@ static void x86_machine_class_init(ObjectClass *oc, void *data)
    mc->cpu_index_to_instance_props = x86_cpu_index_to_props;
    mc->get_default_cpu_node_id = x86_get_default_cpu_node_id;
    mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids;
    mc->kvm_type = x86_kvm_type;
    x86mc->save_tsc_khz = true;
    x86mc->fwcfg_dma_enabled = true;
    nc->nmi_monitor_handler = x86_nmi;
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -179,6 +179,8 @@ static Property q35_host_props[] = {
                     mch.below_4g_mem_size, 0),
    DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MEM_SIZE, Q35PCIHost,
                     mch.above_4g_mem_size, 0),
    DEFINE_PROP_BOOL(PCI_HOST_PROP_SMM_RANGES, Q35PCIHost,
                     mch.has_smm_ranges, true),
    DEFINE_PROP_BOOL("x-pci-hole64-fix", Q35PCIHost, pci_hole64_fix, true),
    DEFINE_PROP_END_OF_LIST(),
 };
@@ -214,6 +216,7 @@ static void q35_host_initfn(Object *obj)
    /* mch's object_initialize resets the default value, set it again */
    qdev_prop_set_uint64(DEVICE(s), PCI_HOST_PROP_PCI_HOLE64_SIZE,
                         Q35_PCI_HOST_HOLE64_SIZE_DEFAULT);
    object_property_add(obj, PCI_HOST_PROP_PCI_HOLE_START, "uint32",
                        q35_host_get_pci_hole_start,
                        NULL, NULL, NULL);
@@ -476,6 +479,10 @@ static void mch_write_config(PCIDevice *d,
        mch_update_pciexbar(mch);
    }
    if (!mch->has_smm_ranges) {
        return;
    }
    if (ranges_overlap(address, len, MCH_HOST_BRIDGE_SMRAM,
                       MCH_HOST_BRIDGE_SMRAM_SIZE)) {
        mch_update_smram(mch);
@@ -494,10 +501,13 @@ static void mch_write_config(PCIDevice *d,
 static void mch_update(MCHPCIState *mch)
 {
    mch_update_pciexbar(mch);
    mch_update_pam(mch);
-    mch_update_smram(mch);
+    if (mch->has_smm_ranges) {
-    mch_update_ext_tseg_mbytes(mch);
+        mch_update_smram(mch);
-    mch_update_smbase_smram(mch);
+        mch_update_ext_tseg_mbytes(mch);
        mch_update_smbase_smram(mch);
    }
    /*
     * pci hole goes from end-of-low-ram to io-apic.
@@ -538,19 +548,21 @@ static void mch_reset(DeviceState *qdev)
    pci_set_quad(d->config + MCH_HOST_BRIDGE_PCIEXBAR,
                 MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT);
-    d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT;
+    if (mch->has_smm_ranges) {
-    d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT;
+        d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT;
-    d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK;
+        d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT;
-    d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK;
+        d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK;
        d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK;
-    if (mch->ext_tseg_mbytes > 0) {
+        if (mch->ext_tseg_mbytes > 0) {
-        pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES,
+            pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES,
-                     MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY);
+                        MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY);
        }
        d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0;
        d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff;
    }
    d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0;
    d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff;
    mch_update(mch);
 }
@@ -568,6 +580,20 @@ static void mch_realize(PCIDevice *d, Error **errp)
    /* setup pci memory mapping */
    pc_pci_as_mapping_init(mch->system_memory, mch->pci_address_space);
    /* PAM */
    init_pam(&mch->pam_regions[0], OBJECT(mch), mch->ram_memory,
             mch->system_memory, mch->pci_address_space,
             PAM_BIOS_BASE, PAM_BIOS_SIZE);
    for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) {
        init_pam(&mch->pam_regions[i + 1], OBJECT(mch), mch->ram_memory,
                 mch->system_memory, mch->pci_address_space,
                 PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE);
    }
    if (!mch->has_smm_ranges) {
        return;
    }
    /* if *disabled* show SMRAM to all CPUs */
    memory_region_init_alias(&mch->smram_region, OBJECT(mch), "smram-region",
                             mch->pci_address_space, MCH_HOST_BRIDGE_SMRAM_C_BASE,
@@ -634,15 +660,6 @@ static void mch_realize(PCIDevice *d, Error **errp)
    object_property_add_const_link(qdev_get_machine(), "smram",
                                   OBJECT(&mch->smram));
    init_pam(&mch->pam_regions[0], OBJECT(mch), mch->ram_memory,
             mch->system_memory, mch->pci_address_space,
             PAM_BIOS_BASE, PAM_BIOS_SIZE);
    for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) {
        init_pam(&mch->pam_regions[i + 1], OBJECT(mch), mch->ram_memory,
                 mch->system_memory, mch->pci_address_space,
                 PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE);
    }
 }
 uint64_t mch_mcfg_base(void)
--- a/hw/ppc/pef.c
+++ b/hw/ppc/pef.c
@@ -15,7 +15,6 @@
 #include "sysemu/kvm.h"
 #include "migration/blocker.h"
 #include "exec/confidential-guest-support.h"
 #include "hw/ppc/pef.h"
 #define TYPE_PEF_GUEST "pef-guest"
 OBJECT_DECLARE_SIMPLE_TYPE(PefGuest, PEF_GUEST)
@@ -93,7 +92,7 @@ static int kvmppc_svm_off(Error **errp)
 #endif
 }
-int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
+static int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
 {
    if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) {
        return 0;
@@ -107,7 +106,7 @@ int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
    return kvmppc_svm_init(cgs, errp);
 }
-int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp)
+static int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp)
 {
    if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) {
        return 0;
@@ -131,6 +130,10 @@ OBJECT_DEFINE_TYPE_WITH_INTERFACES(PefGuest,
 static void pef_guest_class_init(ObjectClass *oc, void *data)
 {
    ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
    klass->kvm_init = pef_kvm_init;
    klass->kvm_reset = pef_kvm_reset;
 }
 static void pef_guest_init(Object *obj)
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -74,6 +74,7 @@
 #include "hw/virtio/vhost-scsi-common.h"
 #include "exec/ram_addr.h"
 #include "exec/confidential-guest-support.h"
 #include "hw/usb.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
@@ -86,7 +87,6 @@
 #include "hw/ppc/spapr_tpm_proxy.h"
 #include "hw/ppc/spapr_nvdimm.h"
 #include "hw/ppc/spapr_numa.h"
 #include "hw/ppc/pef.h"
 #include "monitor/monitor.h"
@@ -1687,7 +1687,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason)
        qemu_guest_getrandom_nofail(spapr->fdt_rng_seed, 32);
    }
-    pef_kvm_reset(machine->cgs, &error_fatal);
+    if (machine->cgs) {
        confidential_guest_kvm_reset(machine->cgs, &error_fatal);
    }
    spapr_caps_apply(spapr);
    first_ppc_cpu = POWERPC_CPU(first_cpu);
@@ -2811,7 +2813,9 @@ static void spapr_machine_init(MachineState *machine)
    /*
     * if Secure VM (PEF) support is configured, then initialize it
     */
-    pef_kvm_init(machine->cgs, &error_fatal);
+    if (machine->cgs) {
        confidential_guest_kvm_init(machine->cgs, &error_fatal);
    }
    msi_nonbroken = true;
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -14,6 +14,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "exec/ram_addr.h"
 #include "exec/confidential-guest-support.h"
 #include "hw/s390x/s390-virtio-hcall.h"
 #include "hw/s390x/sclp.h"
 #include "hw/s390x/s390_flic.h"
@@ -260,7 +261,9 @@ static void ccw_init(MachineState *machine)
    s390_init_cpus(machine);
    /* Need CPU model to be determined before we can set up PV */
-    s390_pv_init(machine->cgs, &error_fatal);
+    if (machine->cgs) {
        confidential_guest_kvm_init(machine->cgs, &error_fatal);
    }
    s390_flic_init();
--- a/include/exec/confidential-guest-support.h
+++ b/include/exec/confidential-guest-support.h
@@ -23,7 +23,10 @@
 #include "qom/object.h"
 #define TYPE_CONFIDENTIAL_GUEST_SUPPORT "confidential-guest-support"
-OBJECT_DECLARE_SIMPLE_TYPE(ConfidentialGuestSupport, CONFIDENTIAL_GUEST_SUPPORT)
+OBJECT_DECLARE_TYPE(ConfidentialGuestSupport,
                    ConfidentialGuestSupportClass,
                    CONFIDENTIAL_GUEST_SUPPORT)
 struct ConfidentialGuestSupport {
    Object parent;
@@ -55,8 +58,37 @@ struct ConfidentialGuestSupport {
 typedef struct ConfidentialGuestSupportClass {
    ObjectClass parent;
    int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp);
    int (*kvm_reset)(ConfidentialGuestSupport *cgs, Error **errp);
 } ConfidentialGuestSupportClass;
 static inline int confidential_guest_kvm_init(ConfidentialGuestSupport *cgs,
                                              Error **errp)
 {
    ConfidentialGuestSupportClass *klass;
    klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs);
    if (klass->kvm_init) {
        return klass->kvm_init(cgs, errp);
    }
    return 0;
 }
 static inline int confidential_guest_kvm_reset(ConfidentialGuestSupport *cgs,
                                               Error **errp)
 {
    ConfidentialGuestSupportClass *klass;
    klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs);
    if (klass->kvm_reset) {
        return klass->kvm_reset(cgs, errp);
    }
    return 0;
 }
 #endif /* !CONFIG_USER_ONLY */
 #endif /* QEMU_CONFIDENTIAL_GUEST_SUPPORT_H */
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -166,6 +166,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque);
 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
 int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
 int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start,
                                        size_t length);
 #endif
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -243,6 +243,9 @@ typedef struct IOMMUTLBEvent {
 /* RAM FD is opened read-only */
 #define RAM_READONLY_FD (1 << 11)
 /* RAM can be private that has kvm guest memfd backend */
 #define RAM_GUEST_MEMFD   (1 << 12)
 static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
                                       IOMMUNotifierFlag flags,
                                       hwaddr start, hwaddr end,
@@ -1307,7 +1310,8 @@ bool memory_region_init_ram_nomigrate(MemoryRegion *mr,
 * @name: Region name, becomes part of RAMBlock name used in migration stream
 *        must be unique within any device
 * @size: size of the region.
- * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE.
+ * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE,
 *             RAM_GUEST_MEMFD.
 * @errp: pointer to Error*, to store an error if it happens.
 *
 * Note that this function does not do anything to cause the data in the
@@ -1369,7 +1373,7 @@ bool memory_region_init_resizeable_ram(MemoryRegion *mr,
 *         (getpagesize()) will be used.
 * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
 *             RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
- *             RAM_READONLY_FD
+ *             RAM_READONLY_FD, RAM_GUEST_MEMFD
 * @path: the path in which to allocate the RAM.
 * @offset: offset within the file referenced by path
 * @errp: pointer to Error*, to store an error if it happens.
@@ -1399,7 +1403,7 @@ bool memory_region_init_ram_from_file(MemoryRegion *mr,
 * @size: size of the region.
 * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
 *             RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
- *             RAM_READONLY_FD
+ *             RAM_READONLY_FD, RAM_GUEST_MEMFD
 * @fd: the fd to mmap.
 * @offset: offset within the file referenced by fd
 * @errp: pointer to Error*, to store an error if it happens.
@@ -1599,6 +1603,12 @@ bool memory_region_init_ram(MemoryRegion *mr,
                            uint64_t size,
                            Error **errp);
 bool memory_region_init_ram_guest_memfd(MemoryRegion *mr,
                                        Object *owner,
                                        const char *name,
                                        uint64_t size,
                                        Error **errp);
 /**
 * memory_region_init_rom: Initialize a ROM memory region.
 *
@@ -1722,6 +1732,16 @@ static inline bool memory_region_is_romd(MemoryRegion *mr)
 */
 bool memory_region_is_protected(MemoryRegion *mr);
 /**
 * memory_region_has_guest_memfd: check whether a memory region has guest_memfd
 *     associated
 *
 * Returns %true if a memory region's ram_block has valid guest_memfd assigned.
 *
 * @mr: the memory region being queried
 */
 bool memory_region_has_guest_memfd(MemoryRegion *mr);
 /**
 * memory_region_get_iommu: check whether a memory region is an iommu
 *
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -109,7 +109,7 @@ long qemu_maxrampagesize(void);
 *  @mr: the memory region where the ram block is
 *  @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
 *              RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
- *              RAM_READONLY_FD
+ *              RAM_READONLY_FD, RAM_GUEST_MEMFD
 *  @mem_path or @fd: specify the backing file or device
 *  @offset: Offset into target file
 *  @errp: pointer to Error*, to store an error if it happens
--- a/include/exec/ramblock.h
+++ b/include/exec/ramblock.h
@@ -41,6 +41,7 @@ struct RAMBlock {
    QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
    int fd;
    uint64_t fd_offset;
    int guest_memfd;
    size_t page_size;
    /* dirty bitmap used during migration */
    unsigned long *bmap;
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -36,6 +36,7 @@ bool machine_usb(MachineState *machine);
 int machine_phandle_start(MachineState *machine);
 bool machine_dump_guest_core(MachineState *machine);
 bool machine_mem_merge(MachineState *machine);
 bool machine_require_guest_memfd(MachineState *machine);
 HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine);
 void machine_set_cpu_numa_node(MachineState *machine,
                               const CpuInstanceProperties *props,
@@ -370,6 +371,7 @@ struct MachineState {
    char *dt_compatible;
    bool dump_guest_core;
    bool mem_merge;
    bool require_guest_memfd;
    bool usb;
    bool usb_disabled;
    char *firmware;
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -161,6 +161,7 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level);
 #define PCI_HOST_PROP_PCI_HOLE64_SIZE  "pci-hole64-size"
 #define PCI_HOST_BELOW_4G_MEM_SIZE     "below-4g-mem-size"
 #define PCI_HOST_ABOVE_4G_MEM_SIZE     "above-4g-mem-size"
 #define PCI_HOST_PROP_SMM_RANGES       "smm-ranges"
 void pc_pci_as_mapping_init(MemoryRegion *system_memory,
--- a/include/hw/i386/tdvf.h
+++ b/include/hw/i386/tdvf.h
@@ -0,0 +1,58 @@
 /*
 * SPDX-License-Identifier: GPL-2.0-or-later
 * Copyright (c) 2020 Intel Corporation
 * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
 *                        <isaku.yamahata at intel.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, see <http://www.gnu.org/licenses/>.
 */
 #ifndef HW_I386_TDVF_H
 #define HW_I386_TDVF_H
 #include "qemu/osdep.h"
 #define TDVF_SECTION_TYPE_BFV               0
 #define TDVF_SECTION_TYPE_CFV               1
 #define TDVF_SECTION_TYPE_TD_HOB            2
 #define TDVF_SECTION_TYPE_TEMP_MEM          3
 #define TDVF_SECTION_ATTRIBUTES_MR_EXTEND   (1U << 0)
 #define TDVF_SECTION_ATTRIBUTES_PAGE_AUG    (1U << 1)
 typedef struct TdxFirmwareEntry {
    uint32_t data_offset;
    uint32_t data_len;
    uint64_t address;
    uint64_t size;
    uint32_t type;
    uint32_t attributes;
    void *mem_ptr;
 } TdxFirmwareEntry;
 typedef struct TdxFirmware {
    void *mem_ptr;
    uint32_t nr_entries;
    TdxFirmwareEntry *entries;
 } TdxFirmware;
 #define for_each_tdx_fw_entry(fw, e)    \
    for (e = (fw)->entries; e != (fw)->entries + (fw)->nr_entries; e++)
 int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size);
 #endif /* HW_I386_TDVF_H */
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -43,6 +43,7 @@ struct X86MachineState {
    MachineState parent;
    /*< public >*/
    unsigned int vm_type;
    /* Pointers to devices and objects: */
    ISADevice *rtc;
@@ -59,6 +60,7 @@ struct X86MachineState {
    uint64_t above_4g_mem_start;
    /* CPU and apic information: */
    bool eoi_intercept_unsupported;
    unsigned pci_irq_mask;
    unsigned apic_id_limit;
    uint16_t boot_cpus;
--- a/include/hw/pci-host/q35.h
+++ b/include/hw/pci-host/q35.h
@@ -50,6 +50,7 @@ struct MCHPCIState {
    MemoryRegion tseg_blackhole, tseg_window;
    MemoryRegion smbase_blackhole, smbase_window;
    bool has_smram_at_smbase;
    bool has_smm_ranges;
    Range pci_hole;
    uint64_t below_4g_mem_size;
    uint64_t above_4g_mem_size;
--- a/include/hw/ppc/pef.h
+++ b/include/hw/ppc/pef.h
@@ -1,17 +0,0 @@
 /*
 * PEF (Protected Execution Facility) for POWER support
 *
 * Copyright Red Hat.
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
 * See the COPYING file in the top-level directory.
 *
 */
 #ifndef HW_PPC_PEF_H
 #define HW_PPC_PEF_H
 int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);
 int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp);
 #endif /* HW_PPC_PEF_H */
--- a/include/standard-headers/drm/drm_fourcc.h
+++ b/include/standard-headers/drm/drm_fourcc.h
@@ -53,7 +53,7 @@ extern "C" {
 * Format modifiers may change any property of the buffer, including the number
 * of planes and/or the required allocation size. Format modifiers are
 * vendor-namespaced, and as such the relationship between a fourcc code and a
- * modifier is specific to the modifer being used. For example, some modifiers
+ * modifier is specific to the modifier being used. For example, some modifiers
 * may preserve meaning - such as number of planes - from the fourcc code,
 * whereas others may not.
 *
@@ -78,7 +78,7 @@ extern "C" {
 *   format.
 * - Higher-level programs interfacing with KMS/GBM/EGL/Vulkan/etc: these users
 *   see modifiers as opaque tokens they can check for equality and intersect.
- *   These users musn't need to know to reason about the modifier value
+ *   These users mustn't need to know to reason about the modifier value
 *   (i.e. they are not expected to extract information out of the modifier).
 *
 * Vendors should document their modifier usage in as much detail as
@@ -539,7 +539,7 @@ extern "C" {
 * This is a tiled layout using 4Kb tiles in row-major layout.
 * Within the tile pixels are laid out in 16 256 byte units / sub-tiles which
 * are arranged in four groups (two wide, two high) with column-major layout.
- * Each group therefore consits out of four 256 byte units, which are also laid
+ * Each group therefore consists out of four 256 byte units, which are also laid
 * out as 2x2 column-major.
 * 256 byte units are made out of four 64 byte blocks of pixels, producing
 * either a square block or a 2:1 unit.
@@ -1102,7 +1102,7 @@ drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier)
 */
 /*
- * The top 4 bits (out of the 56 bits alloted for specifying vendor specific
+ * The top 4 bits (out of the 56 bits allotted for specifying vendor specific
 * modifiers) denote the category for modifiers. Currently we have three
 * categories of modifiers ie AFBC, MISC and AFRC. We can have a maximum of
 * sixteen different categories.
@@ -1418,7 +1418,7 @@ drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier)
 * Amlogic FBC Memory Saving mode
 *
 * Indicates the storage is packed when pixel size is multiple of word
- * boudaries, i.e. 8bit should be stored in this mode to save allocation
+ * boundaries, i.e. 8bit should be stored in this mode to save allocation
 * memory.
 *
 * This mode reduces body layout to 3072 bytes per 64x32 superblock with
--- a/include/standard-headers/linux/ethtool.h
+++ b/include/standard-headers/linux/ethtool.h
@@ -1266,6 +1266,8 @@ struct ethtool_rxfh_indir {
 *	hardware hash key.
 * @hfunc: Defines the current RSS hash function used by HW (or to be set to).
 *	Valid values are one of the %ETH_RSS_HASH_*.
 * @input_xfrm: Defines how the input data is transformed. Valid values are one
 *	of %RXH_XFRM_*.
 * @rsvd8: Reserved for future use; see the note on reserved space.
 * @rsvd32: Reserved for future use; see the note on reserved space.
 * @rss_config: RX ring/queue index for each hash value i.e., indirection table
@@ -1285,7 +1287,8 @@ struct ethtool_rxfh {
 	uint32_t   indir_size;
 	uint32_t   key_size;
 	uint8_t	hfunc;
-	uint8_t	rsvd8[3];
+	uint8_t	input_xfrm;
 	uint8_t	rsvd8[2];
 	uint32_t	rsvd32;
 	uint32_t   rss_config[];
 };
@@ -1992,6 +1995,15 @@ static inline int ethtool_validate_duplex(uint8_t duplex)
 #define WOL_MODE_COUNT		8
 /* RSS hash function data
 * XOR the corresponding source and destination fields of each specified
 * protocol. Both copies of the XOR'ed fields are fed into the RSS and RXHASH
 * calculation. Note that this XORing reduces the input set entropy and could
 * be exploited to reduce the RSS queue spread.
 */
 #define	RXH_XFRM_SYM_XOR	(1 << 0)
 #define	RXH_XFRM_NO_CHANGE	0xff
 /* L2-L4 network traffic flow types */
 #define	TCP_V4_FLOW	0x01	/* hash or spec (tcp_ip4_spec) */
 #define	UDP_V4_FLOW	0x02	/* hash or spec (udp_ip4_spec) */
@@ -2128,18 +2140,6 @@ enum ethtool_reset_flags {
 *	refused. For drivers: ignore this field (use kernel's
 *	__ETHTOOL_LINK_MODE_MASK_NBITS instead), any change to it will
 *	be overwritten by kernel.
 * @supported: Bitmap with each bit meaning given by
 *	%ethtool_link_mode_bit_indices for the link modes, physical
 *	connectors and other link features for which the interface
 *	supports autonegotiation or auto-detection.  Read-only.
 * @advertising: Bitmap with each bit meaning given by
 *	%ethtool_link_mode_bit_indices for the link modes, physical
 *	connectors and other link features that are advertised through
 *	autonegotiation or enabled for auto-detection.
 * @lp_advertising: Bitmap with each bit meaning given by
 *	%ethtool_link_mode_bit_indices for the link modes, and other
 *	link features that the link partner advertised through
 *	autonegotiation; 0 if unknown or not applicable.  Read-only.
 * @transceiver: Used to distinguish different possible PHY types,
 *	reported consistently by PHYLIB.  Read-only.
 * @master_slave_cfg: Master/slave port mode.
@@ -2181,6 +2181,21 @@ enum ethtool_reset_flags {
 * %set_link_ksettings() should validate all fields other than @cmd
 * and @link_mode_masks_nwords that are not described as read-only or
 * deprecated, and must ignore all fields described as read-only.
 *
 * @link_mode_masks is divided into three bitfields, each of length
 * @link_mode_masks_nwords:
 * - supported: Bitmap with each bit meaning given by
 *	%ethtool_link_mode_bit_indices for the link modes, physical
 *	connectors and other link features for which the interface
 *	supports autonegotiation or auto-detection.  Read-only.
 * - advertising: Bitmap with each bit meaning given by
 *	%ethtool_link_mode_bit_indices for the link modes, physical
 *	connectors and other link features that are advertised through
 *	autonegotiation or enabled for auto-detection.
 * - lp_advertising: Bitmap with each bit meaning given by
 *	%ethtool_link_mode_bit_indices for the link modes, and other
 *	link features that the link partner advertised through
 *	autonegotiation; 0 if unknown or not applicable.  Read-only.
 */
 struct ethtool_link_settings {
 	uint32_t	cmd;
--- a/include/standard-headers/linux/virtio_config.h
+++ b/include/standard-headers/linux/virtio_config.h
@@ -52,7 +52,7 @@
 * rest are per-device feature bits.
 */
 #define VIRTIO_TRANSPORT_F_START	28
-#define VIRTIO_TRANSPORT_F_END		41
+#define VIRTIO_TRANSPORT_F_END		42
 #ifndef VIRTIO_CONFIG_NO_LEGACY
 /* Do we get callbacks when the ring is completely used, even if we've
@@ -112,4 +112,10 @@
 * This feature indicates that the driver can reset a queue individually.
 */
 #define VIRTIO_F_RING_RESET		40
 /*
 * This feature indicates that the device support administration virtqueues.
 */
 #define VIRTIO_F_ADMIN_VQ		41
 #endif /* _LINUX_VIRTIO_CONFIG_H */
--- a/include/standard-headers/linux/virtio_pci.h
+++ b/include/standard-headers/linux/virtio_pci.h
@@ -175,6 +175,9 @@ struct virtio_pci_modern_common_cfg {
 	uint16_t queue_notify_data;	/* read-write */
 	uint16_t queue_reset;		/* read-write */
 	uint16_t admin_queue_index;	/* read-only */
 	uint16_t admin_queue_num;		/* read-only */
 };
 /* Fields in VIRTIO_PCI_CAP_PCI_CFG: */
@@ -215,7 +218,72 @@ struct virtio_pci_cfg_cap {
 #define VIRTIO_PCI_COMMON_Q_USEDHI	52
 #define VIRTIO_PCI_COMMON_Q_NDATA	56
 #define VIRTIO_PCI_COMMON_Q_RESET	58
 #define VIRTIO_PCI_COMMON_ADM_Q_IDX	60
 #define VIRTIO_PCI_COMMON_ADM_Q_NUM	62
 #endif /* VIRTIO_PCI_NO_MODERN */
 /* Admin command status. */
 #define VIRTIO_ADMIN_STATUS_OK		0
 /* Admin command opcode. */
 #define VIRTIO_ADMIN_CMD_LIST_QUERY	0x0
 #define VIRTIO_ADMIN_CMD_LIST_USE	0x1
 /* Admin command group type. */
 #define VIRTIO_ADMIN_GROUP_TYPE_SRIOV	0x1
 /* Transitional device admin command. */
 #define VIRTIO_ADMIN_CMD_LEGACY_COMMON_CFG_WRITE	0x2
 #define VIRTIO_ADMIN_CMD_LEGACY_COMMON_CFG_READ		0x3
 #define VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_WRITE		0x4
 #define VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_READ		0x5
 #define VIRTIO_ADMIN_CMD_LEGACY_NOTIFY_INFO		0x6
 struct QEMU_PACKED virtio_admin_cmd_hdr {
 	uint16_t opcode;
 	/*
 	 * 1 - SR-IOV
 	 * 2-65535 - reserved
 	 */
 	uint16_t group_type;
 	/* Unused, reserved for future extensions. */
 	uint8_t reserved1[12];
 	uint64_t group_member_id;
 };
 struct QEMU_PACKED virtio_admin_cmd_status {
 	uint16_t status;
 	uint16_t status_qualifier;
 	/* Unused, reserved for future extensions. */
 	uint8_t reserved2[4];
 };
 struct QEMU_PACKED virtio_admin_cmd_legacy_wr_data {
 	uint8_t offset; /* Starting offset of the register(s) to write. */
 	uint8_t reserved[7];
 	uint8_t registers[];
 };
 struct QEMU_PACKED virtio_admin_cmd_legacy_rd_data {
 	uint8_t offset; /* Starting offset of the register(s) to read. */
 };
 #define VIRTIO_ADMIN_CMD_NOTIFY_INFO_FLAGS_END 0
 #define VIRTIO_ADMIN_CMD_NOTIFY_INFO_FLAGS_OWNER_DEV 0x1
 #define VIRTIO_ADMIN_CMD_NOTIFY_INFO_FLAGS_OWNER_MEM 0x2
 #define VIRTIO_ADMIN_CMD_MAX_NOTIFY_INFO 4
 struct QEMU_PACKED virtio_admin_cmd_notify_info_data {
 	uint8_t flags; /* 0 = end of list, 1 = owner device, 2 = member device */
 	uint8_t bar; /* BAR of the member or the owner device */
 	uint8_t padding[6];
 	uint64_t offset; /* Offset within bar. */
 };
 struct virtio_admin_cmd_notify_info_result {
 	struct virtio_admin_cmd_notify_info_data entries[VIRTIO_ADMIN_CMD_MAX_NOTIFY_INFO];
 };
 #endif
--- a/include/standard-headers/linux/virtio_pmem.h
+++ b/include/standard-headers/linux/virtio_pmem.h
@@ -14,6 +14,13 @@
 #include "standard-headers/linux/virtio_ids.h"
 #include "standard-headers/linux/virtio_config.h"
 /* Feature bits */
 /* guest physical address range will be indicated as shared memory region 0 */
 #define VIRTIO_PMEM_F_SHMEM_REGION 0
 /* shmid of the shared memory region corresponding to the pmem */
 #define VIRTIO_PMEM_SHMEM_REGION_ID 0
 struct virtio_pmem_config {
 	uint64_t start;
 	uint64_t size;
--- a/include/standard-headers/uefi/uefi.h
+++ b/include/standard-headers/uefi/uefi.h
@@ -0,0 +1,198 @@
 /*
 * Copyright (C) 2020 Intel Corporation
 *
 * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
 *                        <isaku.yamahata at intel.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, see <http://www.gnu.org/licenses/>.
 *
 */
 #ifndef HW_I386_UEFI_H
 #define HW_I386_UEFI_H
 /***************************************************************************/
 /*
 * basic EFI definitions
 * supplemented with UEFI Specification Version 2.8 (Errata A)
 * released February 2020
 */
 /* UEFI integer is little endian */
 typedef struct {
    uint32_t Data1;
    uint16_t Data2;
    uint16_t Data3;
    uint8_t Data4[8];
 } EFI_GUID;
 typedef enum {
    EfiReservedMemoryType,
    EfiLoaderCode,
    EfiLoaderData,
    EfiBootServicesCode,
    EfiBootServicesData,
    EfiRuntimeServicesCode,
    EfiRuntimeServicesData,
    EfiConventionalMemory,
    EfiUnusableMemory,
    EfiACPIReclaimMemory,
    EfiACPIMemoryNVS,
    EfiMemoryMappedIO,
    EfiMemoryMappedIOPortSpace,
    EfiPalCode,
    EfiPersistentMemory,
    EfiUnacceptedMemoryType,
    EfiMaxMemoryType
 } EFI_MEMORY_TYPE;
 #define EFI_HOB_HANDOFF_TABLE_VERSION 0x0009
 #define EFI_HOB_TYPE_HANDOFF              0x0001
 #define EFI_HOB_TYPE_MEMORY_ALLOCATION    0x0002
 #define EFI_HOB_TYPE_RESOURCE_DESCRIPTOR  0x0003
 #define EFI_HOB_TYPE_GUID_EXTENSION       0x0004
 #define EFI_HOB_TYPE_FV                   0x0005
 #define EFI_HOB_TYPE_CPU                  0x0006
 #define EFI_HOB_TYPE_MEMORY_POOL          0x0007
 #define EFI_HOB_TYPE_FV2                  0x0009
 #define EFI_HOB_TYPE_LOAD_PEIM_UNUSED     0x000A
 #define EFI_HOB_TYPE_UEFI_CAPSULE         0x000B
 #define EFI_HOB_TYPE_FV3                  0x000C
 #define EFI_HOB_TYPE_UNUSED               0xFFFE
 #define EFI_HOB_TYPE_END_OF_HOB_LIST      0xFFFF
 typedef struct {
    uint16_t HobType;
    uint16_t HobLength;
    uint32_t Reserved;
 } EFI_HOB_GENERIC_HEADER;
 typedef uint64_t EFI_PHYSICAL_ADDRESS;
 typedef uint32_t EFI_BOOT_MODE;
 typedef struct {
    EFI_HOB_GENERIC_HEADER Header;
    uint32_t Version;
    EFI_BOOT_MODE BootMode;
    EFI_PHYSICAL_ADDRESS EfiMemoryTop;
    EFI_PHYSICAL_ADDRESS EfiMemoryBottom;
    EFI_PHYSICAL_ADDRESS EfiFreeMemoryTop;
    EFI_PHYSICAL_ADDRESS EfiFreeMemoryBottom;
    EFI_PHYSICAL_ADDRESS EfiEndOfHobList;
 } EFI_HOB_HANDOFF_INFO_TABLE;
 #define EFI_RESOURCE_SYSTEM_MEMORY          0x00000000
 #define EFI_RESOURCE_MEMORY_MAPPED_IO       0x00000001
 #define EFI_RESOURCE_IO                     0x00000002
 #define EFI_RESOURCE_FIRMWARE_DEVICE        0x00000003
 #define EFI_RESOURCE_MEMORY_MAPPED_IO_PORT  0x00000004
 #define EFI_RESOURCE_MEMORY_RESERVED        0x00000005
 #define EFI_RESOURCE_IO_RESERVED            0x00000006
 #define EFI_RESOURCE_MEMORY_UNACCEPTED      0x00000007
 #define EFI_RESOURCE_MAX_MEMORY_TYPE        0x00000008
 #define EFI_RESOURCE_ATTRIBUTE_PRESENT                  0x00000001
 #define EFI_RESOURCE_ATTRIBUTE_INITIALIZED              0x00000002
 #define EFI_RESOURCE_ATTRIBUTE_TESTED                   0x00000004
 #define EFI_RESOURCE_ATTRIBUTE_SINGLE_BIT_ECC           0x00000008
 #define EFI_RESOURCE_ATTRIBUTE_MULTIPLE_BIT_ECC         0x00000010
 #define EFI_RESOURCE_ATTRIBUTE_ECC_RESERVED_1           0x00000020
 #define EFI_RESOURCE_ATTRIBUTE_ECC_RESERVED_2           0x00000040
 #define EFI_RESOURCE_ATTRIBUTE_READ_PROTECTED           0x00000080
 #define EFI_RESOURCE_ATTRIBUTE_WRITE_PROTECTED          0x00000100
 #define EFI_RESOURCE_ATTRIBUTE_EXECUTION_PROTECTED      0x00000200
 #define EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE              0x00000400
 #define EFI_RESOURCE_ATTRIBUTE_WRITE_COMBINEABLE        0x00000800
 #define EFI_RESOURCE_ATTRIBUTE_WRITE_THROUGH_CACHEABLE  0x00001000
 #define EFI_RESOURCE_ATTRIBUTE_WRITE_BACK_CACHEABLE     0x00002000
 #define EFI_RESOURCE_ATTRIBUTE_16_BIT_IO                0x00004000
 #define EFI_RESOURCE_ATTRIBUTE_32_BIT_IO                0x00008000
 #define EFI_RESOURCE_ATTRIBUTE_64_BIT_IO                0x00010000
 #define EFI_RESOURCE_ATTRIBUTE_UNCACHED_EXPORTED        0x00020000
 #define EFI_RESOURCE_ATTRIBUTE_READ_ONLY_PROTECTED      0x00040000
 #define EFI_RESOURCE_ATTRIBUTE_READ_ONLY_PROTECTABLE    0x00080000
 #define EFI_RESOURCE_ATTRIBUTE_READ_PROTECTABLE         0x00100000
 #define EFI_RESOURCE_ATTRIBUTE_WRITE_PROTECTABLE        0x00200000
 #define EFI_RESOURCE_ATTRIBUTE_EXECUTION_PROTECTABLE    0x00400000
 #define EFI_RESOURCE_ATTRIBUTE_PERSISTENT               0x00800000
 #define EFI_RESOURCE_ATTRIBUTE_PERSISTABLE              0x01000000
 #define EFI_RESOURCE_ATTRIBUTE_MORE_RELIABLE            0x02000000
 typedef uint32_t EFI_RESOURCE_TYPE;
 typedef uint32_t EFI_RESOURCE_ATTRIBUTE_TYPE;
 typedef struct {
    EFI_HOB_GENERIC_HEADER Header;
    EFI_GUID Owner;
    EFI_RESOURCE_TYPE ResourceType;
    EFI_RESOURCE_ATTRIBUTE_TYPE ResourceAttribute;
    EFI_PHYSICAL_ADDRESS PhysicalStart;
    uint64_t ResourceLength;
 } EFI_HOB_RESOURCE_DESCRIPTOR;
 typedef struct {
    EFI_HOB_GENERIC_HEADER Header;
    EFI_GUID Name;
    /* guid specific data follows */
 } EFI_HOB_GUID_TYPE;
 typedef struct {
    EFI_HOB_GENERIC_HEADER Header;
    EFI_PHYSICAL_ADDRESS BaseAddress;
    uint64_t Length;
 } EFI_HOB_FIRMWARE_VOLUME;
 typedef struct {
    EFI_HOB_GENERIC_HEADER Header;
    EFI_PHYSICAL_ADDRESS BaseAddress;
    uint64_t Length;
    EFI_GUID FvName;
    EFI_GUID FileName;
 } EFI_HOB_FIRMWARE_VOLUME2;
 typedef struct {
    EFI_HOB_GENERIC_HEADER Header;
    EFI_PHYSICAL_ADDRESS BaseAddress;
    uint64_t Length;
    uint32_t AuthenticationStatus;
    bool ExtractedFv;
    EFI_GUID FvName;
    EFI_GUID FileName;
 } EFI_HOB_FIRMWARE_VOLUME3;
 typedef struct {
    EFI_HOB_GENERIC_HEADER Header;
    uint8_t SizeOfMemorySpace;
    uint8_t SizeOfIoSpace;
    uint8_t Reserved[6];
 } EFI_HOB_CPU;
 typedef struct {
    EFI_HOB_GENERIC_HEADER Header;
 } EFI_HOB_MEMORY_POOL;
 typedef struct {
    EFI_HOB_GENERIC_HEADER Header;
    EFI_PHYSICAL_ADDRESS BaseAddress;
    uint64_t Length;
 } EFI_HOB_UEFI_CAPSULE;
 #define EFI_HOB_OWNER_ZERO                                      \
    ((EFI_GUID){ 0x00000000, 0x0000, 0x0000,                    \
        { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } })
 #endif
--- a/include/sysemu/hostmem.h
+++ b/include/sysemu/hostmem.h
@@ -74,6 +74,7 @@ struct HostMemoryBackend {
    uint64_t size;
    bool merge, dump, use_canonical_path;
    bool prealloc, is_mapped, share, reserve;
    bool guest_memfd;
    uint32_t prealloc_threads;
    ThreadContext *prealloc_context;
    DECLARE_BITMAP(host_nodes, MAX_NODES + 1);
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -341,6 +341,7 @@ int kvm_arch_get_default_type(MachineState *ms);
 int kvm_arch_init(MachineState *ms, KVMState *s);
 int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp);
 int kvm_arch_init_vcpu(CPUState *cpu);
 int kvm_arch_destroy_vcpu(CPUState *cpu);
@@ -544,4 +545,11 @@ uint32_t kvm_dirty_ring_size(void);
 * reported for the VM.
 */
 bool kvm_hwpoisoned_mem(void);
 int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp);
 int kvm_set_memory_attributes_private(hwaddr start, hwaddr size);
 int kvm_set_memory_attributes_shared(hwaddr start, hwaddr size);
 int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private);
 #endif
--- a/include/sysemu/kvm_int.h
+++ b/include/sysemu/kvm_int.h
@@ -30,6 +30,8 @@ typedef struct KVMSlot
    int as_id;
    /* Cache of the offset in ram address space */
    ram_addr_t ram_start_offset;
    int guest_memfd;
    hwaddr guest_memfd_offset;
 } KVMSlot;
 typedef struct KVMMemoryUpdate {
--- a/linux-headers/asm-generic/unistd.h
+++ b/linux-headers/asm-generic/unistd.h
@@ -829,8 +829,21 @@ __SYSCALL(__NR_futex_wait, sys_futex_wait)
 #define __NR_futex_requeue 456
 __SYSCALL(__NR_futex_requeue, sys_futex_requeue)
 #define __NR_statmount   457
 __SYSCALL(__NR_statmount, sys_statmount)
 #define __NR_listmount   458
 __SYSCALL(__NR_listmount, sys_listmount)
 #define __NR_lsm_get_self_attr 459
 __SYSCALL(__NR_lsm_get_self_attr, sys_lsm_get_self_attr)
 #define __NR_lsm_set_self_attr 460
 __SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr)
 #define __NR_lsm_list_modules 461
 __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules)
 #undef __NR_syscalls
-#define __NR_syscalls 457
+#define __NR_syscalls 462
 /*
 * 32 bit systems traditionally used different
--- a/linux-headers/asm-mips/mman.h
+++ b/linux-headers/asm-mips/mman.h
@@ -88,7 +88,7 @@
 #define MADV_HUGEPAGE	14		/* Worth backing with hugepages */
 #define MADV_NOHUGEPAGE 15		/* Not worth backing with hugepages */
-#define MADV_DONTDUMP	16		/* Explicity exclude from the core dump,
+#define MADV_DONTDUMP	16		/* Explicitly exclude from core dump,
 					   overrides the coredump filter bits */
 #define MADV_DODUMP	17		/* Clear the MADV_NODUMP flag */
--- a/linux-headers/asm-mips/unistd_n32.h
+++ b/linux-headers/asm-mips/unistd_n32.h
@@ -385,5 +385,10 @@
 #define __NR_futex_wake (__NR_Linux + 454)
 #define __NR_futex_wait (__NR_Linux + 455)
 #define __NR_futex_requeue (__NR_Linux + 456)
 #define __NR_statmount (__NR_Linux + 457)
 #define __NR_listmount (__NR_Linux + 458)
 #define __NR_lsm_get_self_attr (__NR_Linux + 459)
 #define __NR_lsm_set_self_attr (__NR_Linux + 460)
 #define __NR_lsm_list_modules (__NR_Linux + 461)
 #endif /* _ASM_UNISTD_N32_H */
--- a/linux-headers/asm-mips/unistd_n64.h
+++ b/linux-headers/asm-mips/unistd_n64.h
@@ -361,5 +361,10 @@
 #define __NR_futex_wake (__NR_Linux + 454)
 #define __NR_futex_wait (__NR_Linux + 455)
 #define __NR_futex_requeue (__NR_Linux + 456)
 #define __NR_statmount (__NR_Linux + 457)
 #define __NR_listmount (__NR_Linux + 458)
 #define __NR_lsm_get_self_attr (__NR_Linux + 459)
 #define __NR_lsm_set_self_attr (__NR_Linux + 460)
 #define __NR_lsm_list_modules (__NR_Linux + 461)
 #endif /* _ASM_UNISTD_N64_H */
--- a/linux-headers/asm-mips/unistd_o32.h
+++ b/linux-headers/asm-mips/unistd_o32.h
@@ -431,5 +431,10 @@
 #define __NR_futex_wake (__NR_Linux + 454)
 #define __NR_futex_wait (__NR_Linux + 455)
 #define __NR_futex_requeue (__NR_Linux + 456)
 #define __NR_statmount (__NR_Linux + 457)
 #define __NR_listmount (__NR_Linux + 458)
 #define __NR_lsm_get_self_attr (__NR_Linux + 459)
 #define __NR_lsm_set_self_attr (__NR_Linux + 460)
 #define __NR_lsm_list_modules (__NR_Linux + 461)
 #endif /* _ASM_UNISTD_O32_H */
--- a/linux-headers/asm-powerpc/unistd_32.h
+++ b/linux-headers/asm-powerpc/unistd_32.h
@@ -438,6 +438,11 @@
 #define __NR_futex_wake 454
 #define __NR_futex_wait 455
 #define __NR_futex_requeue 456
 #define __NR_statmount 457
 #define __NR_listmount 458
 #define __NR_lsm_get_self_attr 459
 #define __NR_lsm_set_self_attr 460
 #define __NR_lsm_list_modules 461
 #endif /* _ASM_UNISTD_32_H */
--- a/linux-headers/asm-powerpc/unistd_64.h
+++ b/linux-headers/asm-powerpc/unistd_64.h
@@ -410,6 +410,11 @@
 #define __NR_futex_wake 454
 #define __NR_futex_wait 455
 #define __NR_futex_requeue 456
 #define __NR_statmount 457
 #define __NR_listmount 458
 #define __NR_lsm_get_self_attr 459
 #define __NR_lsm_set_self_attr 460
 #define __NR_lsm_list_modules 461
 #endif /* _ASM_UNISTD_64_H */
--- a/linux-headers/asm-riscv/kvm.h
+++ b/linux-headers/asm-riscv/kvm.h
@@ -139,6 +139,33 @@ enum KVM_RISCV_ISA_EXT_ID {
 	KVM_RISCV_ISA_EXT_ZIHPM,
 	KVM_RISCV_ISA_EXT_SMSTATEEN,
 	KVM_RISCV_ISA_EXT_ZICOND,
 	KVM_RISCV_ISA_EXT_ZBC,
 	KVM_RISCV_ISA_EXT_ZBKB,
 	KVM_RISCV_ISA_EXT_ZBKC,
 	KVM_RISCV_ISA_EXT_ZBKX,
 	KVM_RISCV_ISA_EXT_ZKND,
 	KVM_RISCV_ISA_EXT_ZKNE,
 	KVM_RISCV_ISA_EXT_ZKNH,
 	KVM_RISCV_ISA_EXT_ZKR,
 	KVM_RISCV_ISA_EXT_ZKSED,
 	KVM_RISCV_ISA_EXT_ZKSH,
 	KVM_RISCV_ISA_EXT_ZKT,
 	KVM_RISCV_ISA_EXT_ZVBB,
 	KVM_RISCV_ISA_EXT_ZVBC,
 	KVM_RISCV_ISA_EXT_ZVKB,
 	KVM_RISCV_ISA_EXT_ZVKG,
 	KVM_RISCV_ISA_EXT_ZVKNED,
 	KVM_RISCV_ISA_EXT_ZVKNHA,
 	KVM_RISCV_ISA_EXT_ZVKNHB,
 	KVM_RISCV_ISA_EXT_ZVKSED,
 	KVM_RISCV_ISA_EXT_ZVKSH,
 	KVM_RISCV_ISA_EXT_ZVKT,
 	KVM_RISCV_ISA_EXT_ZFH,
 	KVM_RISCV_ISA_EXT_ZFHMIN,
 	KVM_RISCV_ISA_EXT_ZIHINTNTL,
 	KVM_RISCV_ISA_EXT_ZVFH,
 	KVM_RISCV_ISA_EXT_ZVFHMIN,
 	KVM_RISCV_ISA_EXT_ZFA,
 	KVM_RISCV_ISA_EXT_MAX,
 };
@@ -157,9 +184,16 @@ enum KVM_RISCV_SBI_EXT_ID {
 	KVM_RISCV_SBI_EXT_EXPERIMENTAL,
 	KVM_RISCV_SBI_EXT_VENDOR,
 	KVM_RISCV_SBI_EXT_DBCN,
 	KVM_RISCV_SBI_EXT_STA,
 	KVM_RISCV_SBI_EXT_MAX,
 };
 /* SBI STA extension registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
 struct kvm_riscv_sbi_sta {
 	unsigned long shmem_lo;
 	unsigned long shmem_hi;
 };
 /* Possible states for kvm_riscv_timer */
 #define KVM_RISCV_TIMER_STATE_OFF	0
 #define KVM_RISCV_TIMER_STATE_ON	1
@@ -241,6 +275,12 @@ enum KVM_RISCV_SBI_EXT_ID {
 #define KVM_REG_RISCV_VECTOR_REG(n)	\
 		((n) + sizeof(struct __riscv_v_ext_state) / sizeof(unsigned long))
 /* Registers for specific SBI extensions are mapped as type 10 */
 #define KVM_REG_RISCV_SBI_STATE		(0x0a << KVM_REG_RISCV_TYPE_SHIFT)
 #define KVM_REG_RISCV_SBI_STA		(0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT)
 #define KVM_REG_RISCV_SBI_STA_REG(name)		\
 		(offsetof(struct kvm_riscv_sbi_sta, name) / sizeof(unsigned long))
 /* Device Control API: RISC-V AIA */
 #define KVM_DEV_RISCV_APLIC_ALIGN		0x1000
 #define KVM_DEV_RISCV_APLIC_SIZE		0x4000
--- a/linux-headers/asm-s390/unistd_32.h
+++ b/linux-headers/asm-s390/unistd_32.h
@@ -429,5 +429,10 @@
 #define __NR_futex_wake 454
 #define __NR_futex_wait 455
 #define __NR_futex_requeue 456
 #define __NR_statmount 457
 #define __NR_listmount 458
 #define __NR_lsm_get_self_attr 459
 #define __NR_lsm_set_self_attr 460
 #define __NR_lsm_list_modules 461
 #endif /* _ASM_S390_UNISTD_32_H */
--- a/linux-headers/asm-s390/unistd_64.h
+++ b/linux-headers/asm-s390/unistd_64.h
@@ -377,5 +377,10 @@
 #define __NR_futex_wake 454
 #define __NR_futex_wait 455
 #define __NR_futex_requeue 456
 #define __NR_statmount 457
 #define __NR_listmount 458
 #define __NR_lsm_get_self_attr 459
 #define __NR_lsm_set_self_attr 460
 #define __NR_lsm_list_modules 461
 #endif /* _ASM_S390_UNISTD_64_H */
--- a/linux-headers/asm-x86/kvm.h
+++ b/linux-headers/asm-x86/kvm.h
@@ -560,4 +560,93 @@ struct kvm_pmu_event_filter {
 /* x86-specific KVM_EXIT_HYPERCALL flags. */
 #define KVM_EXIT_HYPERCALL_LONG_MODE	BIT(0)
 #define KVM_X86_DEFAULT_VM	0
 #define KVM_X86_SW_PROTECTED_VM	1
 #define KVM_X86_TDX_VM		2
 #define KVM_X86_SNP_VM		3
 /* Trust Domain eXtension sub-ioctl() commands. */
 enum kvm_tdx_cmd_id {
 	KVM_TDX_CAPABILITIES = 0,
 	KVM_TDX_INIT_VM,
 	KVM_TDX_INIT_VCPU,
 	KVM_TDX_EXTEND_MEMORY,
 	KVM_TDX_FINALIZE_VM,
 	KVM_TDX_CMD_NR_MAX,
 };
 struct kvm_tdx_cmd {
 	/* enum kvm_tdx_cmd_id */
 	__u32 id;
 	/* flags for sub-commend. If sub-command doesn't use this, set zero. */
 	__u32 flags;
 	/*
 	 * data for each sub-command. An immediate or a pointer to the actual
 	 * data in process virtual address.  If sub-command doesn't use it,
 	 * set zero.
 	 */
 	__u64 data;
 	/*
 	 * Auxiliary error code.  The sub-command may return TDX SEAMCALL
 	 * status code in addition to -Exxx.
 	 * Defined for consistency with struct kvm_sev_cmd.
 	 */
 	__u64 error;
 };
 #define KVM_TDX_CPUID_NO_SUBLEAF	((__u32)-1)
 struct kvm_tdx_cpuid_config {
 	__u32 leaf;
 	__u32 sub_leaf;
 	__u32 eax;
 	__u32 ebx;
 	__u32 ecx;
 	__u32 edx;
 };
 /* supported_gpaw */
 #define TDX_CAP_GPAW_48	(1 << 0)
 #define TDX_CAP_GPAW_52	(1 << 1)
 struct kvm_tdx_capabilities {
 	__u64 attrs_fixed0;
 	__u64 attrs_fixed1;
 	__u64 xfam_fixed0;
 	__u64 xfam_fixed1;
 	__u32 supported_gpaw;
 	__u32 padding;
 	__u64 reserved[251];
 	__u32 nr_cpuid_configs;
 	struct kvm_tdx_cpuid_config cpuid_configs[];
 };
 struct kvm_tdx_init_vm {
 	__u64 attributes;
 	__u64 mrconfigid[6];	/* sha384 digest */
 	__u64 mrowner[6];	/* sha384 digest */
 	__u64 mrownerconfig[6];	/* sha384 digest */
 	/*
 	 * For future extensibility to make sizeof(struct kvm_tdx_init_vm) = 8KB.
 	 * This should be enough given sizeof(TD_PARAMS) = 1024.
 	 * 8KB was chosen given because
 	 * sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES(=256) = 8KB.
 	 */
 	__u64 reserved[1004];
 	/*
 	 * Call KVM_TDX_INIT_VM before vcpu creation, thus before
 	 * KVM_SET_CPUID2.
 	 * This configuration supersedes KVM_SET_CPUID2s for VCPUs because the
 	 * TDX module directly virtualizes those CPUIDs without VMM.  The user
 	 * space VMM, e.g. qemu, should make KVM_SET_CPUID2 consistent with
 	 * those values.  If it doesn't, KVM may have wrong idea of vCPUIDs of
 	 * the guest, and KVM may wrongly emulate CPUIDs or MSRs that the TDX
 	 * module doesn't virtualize.
 	 */
 	struct kvm_cpuid2 cpuid;
 };
 #endif /* _ASM_X86_KVM_H */
--- a/linux-headers/asm-x86/unistd_32.h
+++ b/linux-headers/asm-x86/unistd_32.h
@@ -447,6 +447,11 @@
 #define __NR_futex_wake 454
 #define __NR_futex_wait 455
 #define __NR_futex_requeue 456
 #define __NR_statmount 457
 #define __NR_listmount 458
 #define __NR_lsm_get_self_attr 459
 #define __NR_lsm_set_self_attr 460
 #define __NR_lsm_list_modules 461
 #endif /* _ASM_UNISTD_32_H */
--- a/linux-headers/asm-x86/unistd_64.h
+++ b/linux-headers/asm-x86/unistd_64.h
@@ -369,6 +369,11 @@
 #define __NR_futex_wake 454
 #define __NR_futex_wait 455
 #define __NR_futex_requeue 456
 #define __NR_statmount 457
 #define __NR_listmount 458
 #define __NR_lsm_get_self_attr 459
 #define __NR_lsm_set_self_attr 460
 #define __NR_lsm_list_modules 461
 #endif /* _ASM_UNISTD_64_H */
--- a/linux-headers/asm-x86/unistd_x32.h
+++ b/linux-headers/asm-x86/unistd_x32.h
@@ -321,6 +321,11 @@
 #define __NR_futex_wake (__X32_SYSCALL_BIT + 454)
 #define __NR_futex_wait (__X32_SYSCALL_BIT + 455)
 #define __NR_futex_requeue (__X32_SYSCALL_BIT + 456)
 #define __NR_statmount (__X32_SYSCALL_BIT + 457)
 #define __NR_listmount (__X32_SYSCALL_BIT + 458)
 #define __NR_lsm_get_self_attr (__X32_SYSCALL_BIT + 459)
 #define __NR_lsm_set_self_attr (__X32_SYSCALL_BIT + 460)
 #define __NR_lsm_list_modules (__X32_SYSCALL_BIT + 461)
 #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512)
 #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513)
 #define __NR_ioctl (__X32_SYSCALL_BIT + 514)
--- a/linux-headers/linux/iommufd.h
+++ b/linux-headers/linux/iommufd.h
@@ -49,6 +49,7 @@ enum {
 	IOMMUFD_CMD_GET_HW_INFO,
 	IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING,
 	IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP,
 	IOMMUFD_CMD_HWPT_INVALIDATE,
 };
 /**
@@ -613,4 +614,82 @@ struct iommu_hwpt_get_dirty_bitmap {
 #define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \
 					IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP)
 /**
 * enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation
 *                                        Data Type
 * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1
 */
 enum iommu_hwpt_invalidate_data_type {
 	IOMMU_HWPT_INVALIDATE_DATA_VTD_S1,
 };
 /**
 * enum iommu_hwpt_vtd_s1_invalidate_flags - Flags for Intel VT-d
 *                                           stage-1 cache invalidation
 * @IOMMU_VTD_INV_FLAGS_LEAF: Indicates whether the invalidation applies
 *                            to all-levels page structure cache or just
 *                            the leaf PTE cache.
 */
 enum iommu_hwpt_vtd_s1_invalidate_flags {
 	IOMMU_VTD_INV_FLAGS_LEAF = 1 << 0,
 };
 /**
 * struct iommu_hwpt_vtd_s1_invalidate - Intel VT-d cache invalidation
 *                                       (IOMMU_HWPT_INVALIDATE_DATA_VTD_S1)
 * @addr: The start address of the range to be invalidated. It needs to
 *        be 4KB aligned.
 * @npages: Number of contiguous 4K pages to be invalidated.
 * @flags: Combination of enum iommu_hwpt_vtd_s1_invalidate_flags
 * @__reserved: Must be 0
 *
 * The Intel VT-d specific invalidation data for user-managed stage-1 cache
 * invalidation in nested translation. Userspace uses this structure to
 * tell the impacted cache scope after modifying the stage-1 page table.
 *
 * Invalidating all the caches related to the page table by setting @addr
 * to be 0 and @npages to be U64_MAX.
 *
 * The device TLB will be invalidated automatically if ATS is enabled.
 */
 struct iommu_hwpt_vtd_s1_invalidate {
 	__aligned_u64 addr;
 	__aligned_u64 npages;
 	__u32 flags;
 	__u32 __reserved;
 };
 /**
 * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE)
 * @size: sizeof(struct iommu_hwpt_invalidate)
 * @hwpt_id: ID of a nested HWPT for cache invalidation
 * @data_uptr: User pointer to an array of driver-specific cache invalidation
 *             data.
 * @data_type: One of enum iommu_hwpt_invalidate_data_type, defining the data
 *             type of all the entries in the invalidation request array. It
 *             should be a type supported by the hwpt pointed by @hwpt_id.
 * @entry_len: Length (in bytes) of a request entry in the request array
 * @entry_num: Input the number of cache invalidation requests in the array.
 *             Output the number of requests successfully handled by kernel.
 * @__reserved: Must be 0.
 *
 * Invalidate the iommu cache for user-managed page table. Modifications on a
 * user-managed page table should be followed by this operation to sync cache.
 * Each ioctl can support one or more cache invalidation requests in the array
 * that has a total size of @entry_len * @entry_num.
 *
 * An empty invalidation request array by setting @entry_num==0 is allowed, and
 * @entry_len and @data_uptr would be ignored in this case. This can be used to
 * check if the given @data_type is supported or not by kernel.
 */
 struct iommu_hwpt_invalidate {
 	__u32 size;
 	__u32 hwpt_id;
 	__aligned_u64 data_uptr;
 	__u32 data_type;
 	__u32 entry_len;
 	__u32 entry_num;
 	__u32 __reserved;
 };
 #define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE)
 #endif
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -16,76 +16,6 @@
 #define KVM_API_VERSION 12
 /* *** Deprecated interfaces *** */
 #define KVM_TRC_SHIFT           16
 #define KVM_TRC_ENTRYEXIT       (1 << KVM_TRC_SHIFT)
 #define KVM_TRC_HANDLER         (1 << (KVM_TRC_SHIFT + 1))
 #define KVM_TRC_VMENTRY         (KVM_TRC_ENTRYEXIT + 0x01)
 #define KVM_TRC_VMEXIT          (KVM_TRC_ENTRYEXIT + 0x02)
 #define KVM_TRC_PAGE_FAULT      (KVM_TRC_HANDLER + 0x01)
 #define KVM_TRC_HEAD_SIZE       12
 #define KVM_TRC_CYCLE_SIZE      8
 #define KVM_TRC_EXTRA_MAX       7
 #define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
 #define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)
 #define KVM_TRC_PEND_INTR        (KVM_TRC_HANDLER + 0x04)
 #define KVM_TRC_IO_READ          (KVM_TRC_HANDLER + 0x05)
 #define KVM_TRC_IO_WRITE         (KVM_TRC_HANDLER + 0x06)
 #define KVM_TRC_CR_READ          (KVM_TRC_HANDLER + 0x07)
 #define KVM_TRC_CR_WRITE         (KVM_TRC_HANDLER + 0x08)
 #define KVM_TRC_DR_READ          (KVM_TRC_HANDLER + 0x09)
 #define KVM_TRC_DR_WRITE         (KVM_TRC_HANDLER + 0x0A)
 #define KVM_TRC_MSR_READ         (KVM_TRC_HANDLER + 0x0B)
 #define KVM_TRC_MSR_WRITE        (KVM_TRC_HANDLER + 0x0C)
 #define KVM_TRC_CPUID            (KVM_TRC_HANDLER + 0x0D)
 #define KVM_TRC_INTR             (KVM_TRC_HANDLER + 0x0E)
 #define KVM_TRC_NMI              (KVM_TRC_HANDLER + 0x0F)
 #define KVM_TRC_VMMCALL          (KVM_TRC_HANDLER + 0x10)
 #define KVM_TRC_HLT              (KVM_TRC_HANDLER + 0x11)
 #define KVM_TRC_CLTS             (KVM_TRC_HANDLER + 0x12)
 #define KVM_TRC_LMSW             (KVM_TRC_HANDLER + 0x13)
 #define KVM_TRC_APIC_ACCESS      (KVM_TRC_HANDLER + 0x14)
 #define KVM_TRC_TDP_FAULT        (KVM_TRC_HANDLER + 0x15)
 #define KVM_TRC_GTLB_WRITE       (KVM_TRC_HANDLER + 0x16)
 #define KVM_TRC_STLB_WRITE       (KVM_TRC_HANDLER + 0x17)
 #define KVM_TRC_STLB_INVAL       (KVM_TRC_HANDLER + 0x18)
 #define KVM_TRC_PPC_INSTR        (KVM_TRC_HANDLER + 0x19)
 struct kvm_user_trace_setup {
 	__u32 buf_size;
 	__u32 buf_nr;
 };
 #define __KVM_DEPRECATED_MAIN_W_0x06 \
 	_IOW(KVMIO, 0x06, struct kvm_user_trace_setup)
 #define __KVM_DEPRECATED_MAIN_0x07 _IO(KVMIO, 0x07)
 #define __KVM_DEPRECATED_MAIN_0x08 _IO(KVMIO, 0x08)
 #define __KVM_DEPRECATED_VM_R_0x70 _IOR(KVMIO, 0x70, struct kvm_assigned_irq)
 struct kvm_breakpoint {
 	__u32 enabled;
 	__u32 padding;
 	__u64 address;
 };
 struct kvm_debug_guest {
 	__u32 enabled;
 	__u32 pad;
 	struct kvm_breakpoint breakpoints[4];
 	__u32 singlestep;
 };
 #define __KVM_DEPRECATED_VCPU_W_0x87 _IOW(KVMIO, 0x87, struct kvm_debug_guest)
 /* *** End of deprecated interfaces *** */
 /* for KVM_SET_USER_MEMORY_REGION */
 struct kvm_userspace_memory_region {
 	__u32 slot;
@@ -95,6 +25,19 @@ struct kvm_userspace_memory_region {
 	__u64 userspace_addr; /* start of the userspace allocated memory */
 };
 /* for KVM_SET_USER_MEMORY_REGION2 */
 struct kvm_userspace_memory_region2 {
 	__u32 slot;
 	__u32 flags;
 	__u64 guest_phys_addr;
 	__u64 memory_size;
 	__u64 userspace_addr;
 	__u64 guest_memfd_offset;
 	__u32 guest_memfd;
 	__u32 pad1;
 	__u64 pad2[14];
 };
 /*
 * The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for
 * userspace, other bits are reserved for kvm internal use which are defined
@@ -102,6 +45,7 @@ struct kvm_userspace_memory_region {
 */
 #define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
 #define KVM_MEM_READONLY	(1UL << 1)
 #define KVM_MEM_GUEST_MEMFD	(1UL << 2)
 /* for KVM_IRQ_LINE */
 struct kvm_irq_level {
@@ -223,6 +167,92 @@ struct kvm_xen_exit {
 	} u;
 };
 /* masks for reg_mask to indicate which registers are passed. */
 #define TDX_VMCALL_REG_MASK_RBX	BIT_ULL(2)
 #define TDX_VMCALL_REG_MASK_RDX	BIT_ULL(3)
 #define TDX_VMCALL_REG_MASK_RSI	BIT_ULL(6)
 #define TDX_VMCALL_REG_MASK_RDI	BIT_ULL(7)
 #define TDX_VMCALL_REG_MASK_R8	BIT_ULL(8)
 #define TDX_VMCALL_REG_MASK_R9	BIT_ULL(9)
 #define TDX_VMCALL_REG_MASK_R10	BIT_ULL(10)
 #define TDX_VMCALL_REG_MASK_R11	BIT_ULL(11)
 #define TDX_VMCALL_REG_MASK_R12	BIT_ULL(12)
 #define TDX_VMCALL_REG_MASK_R13	BIT_ULL(13)
 #define TDX_VMCALL_REG_MASK_R14	BIT_ULL(14)
 #define TDX_VMCALL_REG_MASK_R15	BIT_ULL(15)
 struct kvm_tdx_exit {
 #define KVM_EXIT_TDX_VMCALL	1
 	__u32 type;
 	__u32 pad;
 	union {
 		struct kvm_tdx_vmcall {
 			/*
 			 * RAX(bit 0), RCX(bit 1) and RSP(bit 4) are reserved.
 			 * RAX(bit 0): TDG.VP.VMCALL status code.
 			 * RCX(bit 1): bitmap for used registers.
 			 * RSP(bit 4): the caller stack.
 			 */
 			union {
 				__u64 in_rcx;
 				__u64 reg_mask;
 			};
 			/*
 			 * Guest-Host-Communication Interface for TDX spec
 			 * defines the ABI for TDG.VP.VMCALL.
 			 */
 			/* Input parameters: guest -> VMM */
 			union {
 				__u64 in_r10;
 				__u64 type;
 			};
 			union {
 				__u64 in_r11;
 				__u64 subfunction;
 			};
 			/*
 			 * Subfunction specific.
 			 * Registers are used in this order to pass input
 			 * arguments.  r12=arg0, r13=arg1, etc.
 			 */
 			__u64 in_r12;
 			__u64 in_r13;
 			__u64 in_r14;
 			__u64 in_r15;
 			__u64 in_rbx;
 			__u64 in_rdi;
 			__u64 in_rsi;
 			__u64 in_r8;
 			__u64 in_r9;
 			__u64 in_rdx;
 			/* Output parameters: VMM -> guest */
 			union {
 				__u64 out_r10;
 				__u64 status_code;
 			};
 			/*
 			 * Subfunction specific.
 			 * Registers are used in this order to output return
 			 * values.  r11=ret0, r12=ret1, etc.
 			 */
 			__u64 out_r11;
 			__u64 out_r12;
 			__u64 out_r13;
 			__u64 out_r14;
 			__u64 out_r15;
 			__u64 out_rbx;
 			__u64 out_rdi;
 			__u64 out_rsi;
 			__u64 out_r8;
 			__u64 out_r9;
 			__u64 out_rdx;
 		} vmcall;
 	} u;
 };
 #define KVM_S390_GET_SKEYS_NONE   1
 #define KVM_S390_SKEYS_MAX        1048576
@@ -265,6 +295,8 @@ struct kvm_xen_exit {
 #define KVM_EXIT_RISCV_CSR        36
 #define KVM_EXIT_NOTIFY           37
 #define KVM_EXIT_LOONGARCH_IOCSR  38
 #define KVM_EXIT_MEMORY_FAULT     39
 #define KVM_EXIT_TDX              40
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -514,6 +546,15 @@ struct kvm_run {
 #define KVM_NOTIFY_CONTEXT_INVALID	(1 << 0)
 			__u32 flags;
 		} notify;
 		/* KVM_EXIT_MEMORY_FAULT */
 		struct {
 #define KVM_MEMORY_EXIT_FLAG_PRIVATE	(1ULL << 3)
 			__u64 flags;
 			__u64 gpa;
 			__u64 size;
 		} memory_fault;
 		/* KVM_EXIT_TDX_VMCALL */
 		struct kvm_tdx_exit tdx;
 		/* Fix the size of the union. */
 		char padding[256];
 	};
@@ -941,9 +982,6 @@ struct kvm_ppc_resize_hpt {
 */
 #define KVM_GET_VCPU_MMAP_SIZE    _IO(KVMIO,   0x04) /* in bytes */
 #define KVM_GET_SUPPORTED_CPUID   _IOWR(KVMIO, 0x05, struct kvm_cpuid2)
 #define KVM_TRACE_ENABLE          __KVM_DEPRECATED_MAIN_W_0x06
 #define KVM_TRACE_PAUSE           __KVM_DEPRECATED_MAIN_0x07
 #define KVM_TRACE_DISABLE         __KVM_DEPRECATED_MAIN_0x08
 #define KVM_GET_EMULATED_CPUID	  _IOWR(KVMIO, 0x09, struct kvm_cpuid2)
 #define KVM_GET_MSR_FEATURE_INDEX_LIST    _IOWR(KVMIO, 0x0a, struct kvm_msr_list)
@@ -1197,6 +1235,13 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228
 #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229
 #define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230
 #define KVM_CAP_USER_MEMORY2 231
 #define KVM_CAP_MEMORY_FAULT_INFO 232
 #define KVM_CAP_MEMORY_ATTRIBUTES 233
 #define KVM_CAP_GUEST_MEMFD 234
 #define KVM_CAP_VM_TYPES 235
 #define KVM_CAP_MEMORY_MAPPING 236
 #define KVM_CAP_X86_BUS_FREQUENCY_CONTROL 237
 #ifdef KVM_CAP_IRQ_ROUTING
@@ -1287,6 +1332,7 @@ struct kvm_x86_mce {
 #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL	(1 << 4)
 #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND		(1 << 5)
 #define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG	(1 << 6)
 #define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE	(1 << 7)
 struct kvm_xen_hvm_config {
 	__u32 flags;
@@ -1479,6 +1525,8 @@ struct kvm_vfio_spapr_tce {
 					struct kvm_userspace_memory_region)
 #define KVM_SET_TSS_ADDR          _IO(KVMIO,   0x47)
 #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO,  0x48, __u64)
 #define KVM_SET_USER_MEMORY_REGION2 _IOW(KVMIO, 0x49, \
 					 struct kvm_userspace_memory_region2)
 /* enable ucontrol for s390 */
 struct kvm_s390_ucas_mapping {
@@ -1503,20 +1551,8 @@ struct kvm_s390_ucas_mapping {
 			_IOW(KVMIO,  0x67, struct kvm_coalesced_mmio_zone)
 #define KVM_UNREGISTER_COALESCED_MMIO \
 			_IOW(KVMIO,  0x68, struct kvm_coalesced_mmio_zone)
 #define KVM_ASSIGN_PCI_DEVICE     _IOR(KVMIO,  0x69, \
 				       struct kvm_assigned_pci_dev)
 #define KVM_SET_GSI_ROUTING       _IOW(KVMIO,  0x6a, struct kvm_irq_routing)
 /* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */
 #define KVM_ASSIGN_IRQ            __KVM_DEPRECATED_VM_R_0x70
 #define KVM_ASSIGN_DEV_IRQ        _IOW(KVMIO,  0x70, struct kvm_assigned_irq)
 #define KVM_REINJECT_CONTROL      _IO(KVMIO,   0x71)
 #define KVM_DEASSIGN_PCI_DEVICE   _IOW(KVMIO,  0x72, \
 				       struct kvm_assigned_pci_dev)
 #define KVM_ASSIGN_SET_MSIX_NR    _IOW(KVMIO,  0x73, \
 				       struct kvm_assigned_msix_nr)
 #define KVM_ASSIGN_SET_MSIX_ENTRY _IOW(KVMIO,  0x74, \
 				       struct kvm_assigned_msix_entry)
 #define KVM_DEASSIGN_DEV_IRQ      _IOW(KVMIO,  0x75, struct kvm_assigned_irq)
 #define KVM_IRQFD                 _IOW(KVMIO,  0x76, struct kvm_irqfd)
 #define KVM_CREATE_PIT2		  _IOW(KVMIO,  0x77, struct kvm_pit_config)
 #define KVM_SET_BOOT_CPU_ID       _IO(KVMIO,   0x78)
@@ -1533,9 +1569,6 @@ struct kvm_s390_ucas_mapping {
 *  KVM_CAP_VM_TSC_CONTROL to set defaults for a VM */
 #define KVM_SET_TSC_KHZ           _IO(KVMIO,  0xa2)
 #define KVM_GET_TSC_KHZ           _IO(KVMIO,  0xa3)
 /* Available with KVM_CAP_PCI_2_3 */
 #define KVM_ASSIGN_SET_INTX_MASK  _IOW(KVMIO,  0xa4, \
 				       struct kvm_assigned_pci_dev)
 /* Available with KVM_CAP_SIGNAL_MSI */
 #define KVM_SIGNAL_MSI            _IOW(KVMIO,  0xa5, struct kvm_msi)
 /* Available with KVM_CAP_PPC_GET_SMMU_INFO */
@@ -1588,8 +1621,6 @@ struct kvm_s390_ucas_mapping {
 #define KVM_SET_SREGS             _IOW(KVMIO,  0x84, struct kvm_sregs)
 #define KVM_TRANSLATE             _IOWR(KVMIO, 0x85, struct kvm_translation)
 #define KVM_INTERRUPT             _IOW(KVMIO,  0x86, struct kvm_interrupt)
 /* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */
 #define KVM_DEBUG_GUEST           __KVM_DEPRECATED_VCPU_W_0x87
 #define KVM_GET_MSRS              _IOWR(KVMIO, 0x88, struct kvm_msrs)
 #define KVM_SET_MSRS              _IOW(KVMIO,  0x89, struct kvm_msrs)
 #define KVM_SET_CPUID             _IOW(KVMIO,  0x8a, struct kvm_cpuid)
@@ -2263,4 +2294,33 @@ struct kvm_s390_zpci_op {
 /* flags for kvm_s390_zpci_op->u.reg_aen.flags */
 #define KVM_S390_ZPCIOP_REGAEN_HOST    (1 << 0)
 /* Available with KVM_CAP_MEMORY_ATTRIBUTES */
 #define KVM_SET_MEMORY_ATTRIBUTES              _IOW(KVMIO,  0xd2, struct kvm_memory_attributes)
 struct kvm_memory_attributes {
 	__u64 address;
 	__u64 size;
 	__u64 attributes;
 	__u64 flags;
 };
 #define KVM_MEMORY_ATTRIBUTE_PRIVATE           (1ULL << 3)
 #define KVM_CREATE_GUEST_MEMFD	_IOWR(KVMIO,  0xd4, struct kvm_create_guest_memfd)
 struct kvm_create_guest_memfd {
 	__u64 size;
 	__u64 flags;
 	__u64 reserved[6];
 };
 #define KVM_MEMORY_MAPPING	_IOWR(KVMIO, 0xd5, struct kvm_memory_mapping)
 struct kvm_memory_mapping {
 	__u64 base_gfn;
 	__u64 nr_pages;
 	__u64 flags;
 	__u64 source;
 };
 #endif /* __LINUX_KVM_H */
--- a/linux-headers/linux/userfaultfd.h
+++ b/linux-headers/linux/userfaultfd.h
@@ -41,7 +41,8 @@
 			   UFFD_FEATURE_WP_HUGETLBFS_SHMEM |	\
 			   UFFD_FEATURE_WP_UNPOPULATED |	\
 			   UFFD_FEATURE_POISON |		\
-			   UFFD_FEATURE_WP_ASYNC)
+			   UFFD_FEATURE_WP_ASYNC |		\
 			   UFFD_FEATURE_MOVE)
 #define UFFD_API_IOCTLS				\
 	((__u64)1 << _UFFDIO_REGISTER |		\
 	 (__u64)1 << _UFFDIO_UNREGISTER |	\
@@ -50,6 +51,7 @@
 	((__u64)1 << _UFFDIO_WAKE |		\
 	 (__u64)1 << _UFFDIO_COPY |		\
 	 (__u64)1 << _UFFDIO_ZEROPAGE |		\
 	 (__u64)1 << _UFFDIO_MOVE |		\
 	 (__u64)1 << _UFFDIO_WRITEPROTECT |	\
 	 (__u64)1 << _UFFDIO_CONTINUE |		\
 	 (__u64)1 << _UFFDIO_POISON)
@@ -73,6 +75,7 @@
 #define _UFFDIO_WAKE			(0x02)
 #define _UFFDIO_COPY			(0x03)
 #define _UFFDIO_ZEROPAGE		(0x04)
 #define _UFFDIO_MOVE			(0x05)
 #define _UFFDIO_WRITEPROTECT		(0x06)
 #define _UFFDIO_CONTINUE		(0x07)
 #define _UFFDIO_POISON			(0x08)
@@ -92,6 +95,8 @@
 				      struct uffdio_copy)
 #define UFFDIO_ZEROPAGE		_IOWR(UFFDIO, _UFFDIO_ZEROPAGE,	\
 				      struct uffdio_zeropage)
 #define UFFDIO_MOVE		_IOWR(UFFDIO, _UFFDIO_MOVE,	\
 				      struct uffdio_move)
 #define UFFDIO_WRITEPROTECT	_IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \
 				      struct uffdio_writeprotect)
 #define UFFDIO_CONTINUE		_IOWR(UFFDIO, _UFFDIO_CONTINUE,	\
@@ -222,6 +227,9 @@ struct uffdio_api {
 	 * asynchronous mode is supported in which the write fault is
 	 * automatically resolved and write-protection is un-set.
 	 * It implies UFFD_FEATURE_WP_UNPOPULATED.
 	 *
 	 * UFFD_FEATURE_MOVE indicates that the kernel supports moving an
 	 * existing page contents from userspace.
 	 */
 #define UFFD_FEATURE_PAGEFAULT_FLAG_WP		(1<<0)
 #define UFFD_FEATURE_EVENT_FORK			(1<<1)
@@ -239,6 +247,7 @@ struct uffdio_api {
 #define UFFD_FEATURE_WP_UNPOPULATED		(1<<13)
 #define UFFD_FEATURE_POISON			(1<<14)
 #define UFFD_FEATURE_WP_ASYNC			(1<<15)
 #define UFFD_FEATURE_MOVE			(1<<16)
 	__u64 features;
 	__u64 ioctls;
@@ -347,6 +356,24 @@ struct uffdio_poison {
 	__s64 updated;
 };
 struct uffdio_move {
 	__u64 dst;
 	__u64 src;
 	__u64 len;
 	/*
 	 * Especially if used to atomically remove memory from the
 	 * address space the wake on the dst range is not needed.
 	 */
 #define UFFDIO_MOVE_MODE_DONTWAKE		((__u64)1<<0)
 #define UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES	((__u64)1<<1)
 	__u64 mode;
 	/*
 	 * "move" is written by the ioctl and must be at the end: the
 	 * copy_from_user will not read the last 8 bytes.
 	 */
 	__s64 move;
 };
 /*
 * Flags for the userfaultfd(2) system call itself.
 */
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -1219,6 +1219,7 @@ enum vfio_device_mig_state {
 	VFIO_DEVICE_STATE_RUNNING_P2P = 5,
 	VFIO_DEVICE_STATE_PRE_COPY = 6,
 	VFIO_DEVICE_STATE_PRE_COPY_P2P = 7,
 	VFIO_DEVICE_STATE_NR,
 };
 /**
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -895,6 +895,42 @@
            'reduced-phys-bits': 'uint32',
            '*kernel-hashes': 'bool' } }
 ##
 # @TdxGuestProperties:
 #
 # Properties for tdx-guest objects.
 #
 # @sept-ve-disable: toggle bit 28 of TD attributes to control disabling
 #     of EPT violation conversion to #VE on guest TD access of PENDING
 #     pages.  Some guest OS (e.g., Linux TD guest) may require this to
 #     be set, otherwise they refuse to boot.
 #
 # @mrconfigid: ID for non-owner-defined configuration of the guest TD,
 #     e.g., run-time or OS configuration (base64 encoded SHA384 digest).
 #     (A default value 0 of SHA384 is used when absent).
 #
 # @mrowner: ID for the guest TD’s owner (base64 encoded SHA384 digest).
 #     (A default value 0 of SHA384 is used when absent).
 #
 # @mrownerconfig: ID for owner-defined configuration of the guest TD,
 #     e.g., specific to the workload rather than the run-time or OS
 #     (base64 encoded SHA384 digest). (A default value 0 of SHA384 is
 #     used when absent).
 #
 # @quote-generation-socket: socket address for Quote Generation
 #     Service (QGS).  QGS is a daemon running on the host.  User in
 #     TD guest cannot get TD quoting for attestation if QGS is not
 #     provided.  So admin should always provide it.
 #
 # Since: 9.0
 ##
 { 'struct': 'TdxGuestProperties',
  'data': { '*sept-ve-disable': 'bool',
            '*mrconfigid': 'str',
            '*mrowner': 'str',
            '*mrownerconfig': 'str',
            '*quote-generation-socket': 'SocketAddress' } }
 ##
 # @ThreadContextProperties:
 #
@@ -974,6 +1010,7 @@
    'sev-guest',
    'thread-context',
    's390-pv-guest',
    'tdx-guest',
    'throttle-group',
    'tls-creds-anon',
    'tls-creds-psk',
@@ -1041,6 +1078,7 @@
      'secret_keyring':             { 'type': 'SecretKeyringProperties',
                                      'if': 'CONFIG_SECRET_KEYRING' },
      'sev-guest':                  'SevGuestProperties',
      'tdx-guest':                  'TdxGuestProperties',
      'thread-context':             'ThreadContextProperties',
      'throttle-group':             'ThrottleGroupProperties',
      'tls-creds-anon':             'TlsCredsAnonProperties',
--- a/qapi/run-state.json
+++ b/qapi/run-state.json
@@ -483,10 +483,12 @@
 #
 # @s390: s390 guest panic information type (Since: 2.12)
 #
 # @tdx: tdx guest panic information type (Since: 9.0)
 #
 # Since: 2.9
 ##
 { 'enum': 'GuestPanicInformationType',
-  'data': [ 'hyper-v', 's390' ] }
+  'data': [ 'hyper-v', 's390', 'tdx' ] }
 ##
 # @GuestPanicInformation:
@@ -501,7 +503,8 @@
 'base': {'type': 'GuestPanicInformationType'},
 'discriminator': 'type',
 'data': {'hyper-v': 'GuestPanicInformationHyperV',
-          's390': 'GuestPanicInformationS390'}}
+          's390': 'GuestPanicInformationS390',
          'tdx' : 'GuestPanicInformationTdx'}}
 ##
 # @GuestPanicInformationHyperV:
@@ -564,6 +567,30 @@
          'psw-addr': 'uint64',
          'reason': 'S390CrashReason'}}
 ##
 # @GuestPanicInformationTdx:
 #
 # TDX Guest panic information specific to TDX, as specified in the
 # "Guest-Hypervisor Communication Interface (GHCI) Specification",
 # section TDG.VP.VMCALL<ReportFatalError>.
 #
 # @error-code: TD-specific error code
 #
 # @message: Human-readable error message provided by the guest. Not
 #     to be trusted.
 #
 # @gpa: guest-physical address of a page that contains more verbose
 #     error information, as zero-terminated string.  Present when the
 #     "GPA valid" bit (bit 63) is set in @error-code.
 #
 #
 # Since: 9.0
 ##
 {'struct': 'GuestPanicInformationTdx',
 'data': {'error-code': 'uint64',
          'message': 'str',
          '*gpa': 'uint64'}}
 ##
 # @MEMORY_FAILURE:
 #
--- a/system/memory.c
+++ b/system/memory.c
@@ -1850,6 +1850,11 @@ bool memory_region_is_protected(MemoryRegion *mr)
    return mr->ram && (mr->ram_block->flags & RAM_PROTECTED);
 }
 bool memory_region_has_guest_memfd(MemoryRegion *mr)
 {
    return mr->ram_block && mr->ram_block->guest_memfd >= 0;
 }
 uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr)
 {
    uint8_t mask = mr->dirty_log_mask;
@@ -3601,6 +3606,31 @@ bool memory_region_init_ram(MemoryRegion *mr,
    return true;
 }
 bool memory_region_init_ram_guest_memfd(MemoryRegion *mr,
                                        Object *owner,
                                        const char *name,
                                        uint64_t size,
                                        Error **errp)
 {
    DeviceState *owner_dev;
    if (!memory_region_init_ram_flags_nomigrate(mr, owner, name, size,
                                                RAM_GUEST_MEMFD, errp)) {
        return false;
    }
    /* This will assert if owner is neither NULL nor a DeviceState.
     * We only want the owner here for the purposes of defining a
     * unique name for migration. TODO: Ideally we should implement
     * a naming scheme for Objects which are not DeviceStates, in
     * which case we can relax this restriction.
     */
    owner_dev = DEVICE(owner);
    vmstate_register_ram(mr, owner_dev);
    return true;
 }
 bool memory_region_init_rom(MemoryRegion *mr,
                            Object *owner,
                            const char *name,
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -1841,6 +1841,17 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
        }
    }
    if (kvm_enabled() && (new_block->flags & RAM_GUEST_MEMFD)) {
        assert(new_block->guest_memfd < 0);
        new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length,
                                                        0, errp);
        if (new_block->guest_memfd < 0) {
            qemu_mutex_unlock_ramlist();
            return;
        }
    }
    new_ram_size = MAX(old_ram_size,
              (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
    if (new_ram_size > old_ram_size) {
@@ -1903,7 +1914,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
    /* Just support these ram flags by now. */
    assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
                          RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY |
-                          RAM_READONLY_FD)) == 0);
+                          RAM_READONLY_FD | RAM_GUEST_MEMFD)) == 0);
    if (xen_enabled()) {
        error_setg(errp, "-mem-path not supported with Xen");
@@ -1938,6 +1949,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
    new_block->used_length = size;
    new_block->max_length = size;
    new_block->flags = ram_flags;
    new_block->guest_memfd = -1;
    new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset,
                                     errp);
    if (!new_block->host) {
@@ -2016,7 +2028,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
    Error *local_err = NULL;
    assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC |
-                          RAM_NORESERVE)) == 0);
+                          RAM_NORESERVE| RAM_GUEST_MEMFD)) == 0);
    assert(!host ^ (ram_flags & RAM_PREALLOC));
    size = HOST_PAGE_ALIGN(size);
@@ -2028,6 +2040,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
    new_block->max_length = max_size;
    assert(max_size >= size);
    new_block->fd = -1;
    new_block->guest_memfd = -1;
    new_block->page_size = qemu_real_host_page_size();
    new_block->host = host;
    new_block->flags = ram_flags;
@@ -2050,7 +2063,7 @@ RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
 RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags,
                         MemoryRegion *mr, Error **errp)
 {
-    assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE)) == 0);
+    assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0);
    return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp);
 }
@@ -2078,6 +2091,11 @@ static void reclaim_ramblock(RAMBlock *block)
    } else {
        qemu_anon_ram_free(block->host, block->max_length);
    }
    if (block->guest_memfd >= 0) {
        close(block->guest_memfd);
    }
    g_free(block);
 }
@@ -3600,6 +3618,29 @@ err:
    return ret;
 }
 int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start,
                                        size_t length)
 {
    int ret = -1;
 #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
    ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
                    start, length);
    if (ret) {
        ret = -errno;
        error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)",
                     __func__, rb->idstr, start, length, ret);
    }
 #else
    ret = -ENOSYS;
    error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)",
                 __func__, rb->idstr, start, length, ret);
 #endif
    return ret;
 }
 bool ramblock_is_pmem(RAMBlock *rb)
 {
    return rb->flags & RAM_PMEM;
--- a/system/runstate.c
+++ b/system/runstate.c
@@ -519,6 +519,52 @@ static void qemu_system_wakeup(void)
    }
 }
 static char* tdx_parse_panic_message(char *message)
 {
    bool printable = false;
    char *buf = NULL;
    int len = 0, i;
    /*
     * Although message is defined as a json string, we shouldn't
     * unconditionally treat it as is because the guest generated it and
     * it's not necessarily trustable.
     */
    if (message) {
        /* The caller guarantees the NUL-terminated string. */
        len = strlen(message);
        printable = len > 0;
        for (i = 0; i < len; i++) {
            if (!(0x20 <= message[i] && message[i] <= 0x7e)) {
                printable = false;
                break;
            }
        }
    }
    if (!printable && len) {
        /* 3 = length of "%02x " */
        buf = g_malloc(len * 3);
        for (i = 0; i < len; i++) {
            if (message[i] == '\0') {
                break;
            } else {
                sprintf(buf + 3 * i, "%02x ", message[i]);
            }
        }
        if (i > 0)
            /* replace the last ' '(space) to NUL */
            buf[i * 3 - 1] = '\0';
        else
            buf[0] = '\0';
        return buf;
    }
    return message;
 }
 void qemu_system_guest_panicked(GuestPanicInformation *info)
 {
    qemu_log_mask(LOG_GUEST_ERROR, "Guest crashed");
@@ -560,7 +606,19 @@ void qemu_system_guest_panicked(GuestPanicInformation *info)
                          S390CrashReason_str(info->u.s390.reason),
                          info->u.s390.psw_mask,
                          info->u.s390.psw_addr);
        } else if (info->type == GUEST_PANIC_INFORMATION_TYPE_TDX) {
            qemu_log_mask(LOG_GUEST_ERROR,
                          " TDX guest reports fatal error:"
                          " error code: 0x%#" PRIx64 " error message:\"%s\"\n",
                          info->u.tdx.error_code,
                          tdx_parse_panic_message(info->u.tdx.message));
            if (info->u.tdx.error_code & (1ull << 63)) {
                qemu_log_mask(LOG_GUEST_ERROR, "Additional error information "
                              "can be found at gpa page: 0x%#" PRIx64 "\n",
                              info->u.tdx.gpa);
            }
        }
        qapi_free_GuestPanicInformation(info);
    }
 }
--- a/target/i386/cpu-internal.h
+++ b/target/i386/cpu-internal.h
@@ -20,6 +20,15 @@
 #ifndef I386_CPU_INTERNAL_H
 #define I386_CPU_INTERNAL_H
 typedef struct FeatureMask {
    FeatureWord index;
    uint64_t mask;
 } FeatureMask;
 typedef struct FeatureDep {
    FeatureMask from, to;
 } FeatureDep;
 typedef enum FeatureWordType {
   CPUID_FEATURE_WORD,
   MSR_FEATURE_WORD,
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1443,15 +1443,6 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
    },
 };
 typedef struct FeatureMask {
    FeatureWord index;
    uint64_t mask;
 } FeatureMask;
 typedef struct FeatureDep {
    FeatureMask from, to;
 } FeatureDep;
 static FeatureDep feature_dependencies[] = {
    {
        .from = { FEAT_7_0_EDX,             CPUID_7_0_EDX_ARCH_CAPABILITIES },
@@ -1576,9 +1567,6 @@ static const X86RegisterInfo32 x86_reg_info_32[CPU_NB_REGS32] = {
 };
 #undef REGISTER
 /* CPUID feature bits available in XSS */
 #define CPUID_XSTATE_XSS_MASK    (XSTATE_ARCH_LBR_MASK)
 ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = {
    [XSTATE_FP_BIT] = {
        /* x87 FP state component is always enabled if XSAVE is supported */
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -595,6 +595,9 @@ typedef enum X86Seg {
                                 XSTATE_Hi16_ZMM_MASK | XSTATE_PKRU_MASK | \
                                 XSTATE_XTILE_CFG_MASK | XSTATE_XTILE_DATA_MASK)
 /* CPUID feature bits available in XSS */
 #define CPUID_XSTATE_XSS_MASK    (XSTATE_ARCH_LBR_MASK)
 /* CPUID feature words */
 typedef enum FeatureWord {
    FEAT_1_EDX,         /* CPUID[1].EDX */
@@ -787,6 +790,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
 /* Support RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */
 #define CPUID_7_0_EBX_FSGSBASE          (1U << 0)
 /* Support for TSC adjustment MSR 0x3B */
 #define CPUID_7_0_EBX_TSC_ADJUST        (1U << 1)
 /* Support SGX */
 #define CPUID_7_0_EBX_SGX               (1U << 2)
 /* 1st Group of Advanced Bit Manipulation Extensions */
@@ -805,8 +810,12 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
 #define CPUID_7_0_EBX_INVPCID           (1U << 10)
 /* Restricted Transactional Memory */
 #define CPUID_7_0_EBX_RTM               (1U << 11)
 /* Cache QoS Monitoring */
 #define CPUID_7_0_EBX_PQM               (1U << 12)
 /* Memory Protection Extension */
 #define CPUID_7_0_EBX_MPX               (1U << 14)
 /* Resource Director Technology Allocation */
 #define CPUID_7_0_EBX_RDT_A             (1U << 15)
 /* AVX-512 Foundation */
 #define CPUID_7_0_EBX_AVX512F           (1U << 16)
 /* AVX-512 Doubleword & Quadword Instruction */
@@ -862,12 +871,20 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
 #define CPUID_7_0_ECX_AVX512VNNI        (1U << 11)
 /* Support for VPOPCNT[B,W] and VPSHUFBITQMB */
 #define CPUID_7_0_ECX_AVX512BITALG      (1U << 12)
 /* Intel Total Memory Encryption */
 #define CPUID_7_0_ECX_TME               (1U << 13)
 /* POPCNT for vectors of DW/QW */
 #define CPUID_7_0_ECX_AVX512_VPOPCNTDQ  (1U << 14)
 /* Placeholder for bit 15 */
 #define CPUID_7_0_ECX_FZM               (1U << 15)
 /* 5-level Page Tables */
 #define CPUID_7_0_ECX_LA57              (1U << 16)
 /* MAWAU for MPX */
 #define CPUID_7_0_ECX_MAWAU             (31U << 17)
 /* Read Processor ID */
 #define CPUID_7_0_ECX_RDPID             (1U << 22)
 /* KeyLocker */
 #define CPUID_7_0_ECX_KeyLocker         (1U << 23)
 /* Bus Lock Debug Exception */
 #define CPUID_7_0_ECX_BUS_LOCK_DETECT   (1U << 24)
 /* Cache Line Demote Instruction */
@@ -876,6 +893,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
 #define CPUID_7_0_ECX_MOVDIRI           (1U << 27)
 /* Move 64 Bytes as Direct Store Instruction */
 #define CPUID_7_0_ECX_MOVDIR64B         (1U << 28)
 /* ENQCMD and ENQCMDS instructions */
 #define CPUID_7_0_ECX_ENQCMD            (1U << 29)
 /* Support SGX Launch Control */
 #define CPUID_7_0_ECX_SGX_LC            (1U << 30)
 /* Protection Keys for Supervisor-mode Pages */
@@ -893,6 +912,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
 #define CPUID_7_0_EDX_SERIALIZE         (1U << 14)
 /* TSX Suspend Load Address Tracking instruction */
 #define CPUID_7_0_EDX_TSX_LDTRK         (1U << 16)
 /* PCONFIG instruction */
 #define CPUID_7_0_EDX_PCONFIG           (1U << 18)
 /* Architectural LBRs */
 #define CPUID_7_0_EDX_ARCH_LBR          (1U << 19)
 /* AMX_BF16 instruction */
--- a/target/i386/kvm/kvm-cpu.c
+++ b/target/i386/kvm/kvm-cpu.c
@@ -15,6 +15,7 @@
 #include "sysemu/sysemu.h"
 #include "hw/boards.h"
 #include "tdx.h"
 #include "kvm_i386.h"
 #include "hw/core/accel-cpu.h"
@@ -60,6 +61,10 @@ static bool lmce_supported(void)
    if (kvm_ioctl(kvm_state, KVM_X86_GET_MCE_CAP_SUPPORTED, &mce_cap) < 0) {
        return false;
    }
    if (is_tdx_vm())
        return false;
    return !!(mce_cap & MCG_LMCE_P);
 }
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -32,6 +32,7 @@
 #include "sysemu/runstate.h"
 #include "kvm_i386.h"
 #include "sev.h"
 #include "tdx.h"
 #include "xen-emu.h"
 #include "hyperv.h"
 #include "hyperv-proto.h"
@@ -161,6 +162,35 @@ static KVMMSRHandlers msr_handlers[KVM_MSR_FILTER_MAX_RANGES];
 static RateLimit bus_lock_ratelimit_ctrl;
 static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value);
 static const char *vm_type_name[] = {
    [KVM_X86_DEFAULT_VM] = "default",
    [KVM_X86_TDX_VM] = "tdx",
 };
 int kvm_get_vm_type(MachineState *ms, const char *vm_type)
 {
    int kvm_type = KVM_X86_DEFAULT_VM;
    if (ms->cgs && object_dynamic_cast(OBJECT(ms->cgs), TYPE_TDX_GUEST)) {
        kvm_type = KVM_X86_TDX_VM;
    }
    /*
     * old KVM doesn't support KVM_CAP_VM_TYPES and KVM_X86_DEFAULT_VM
     * is always supported
     */
    if (kvm_type == KVM_X86_DEFAULT_VM) {
        return kvm_type;
    }
    if (!(kvm_check_extension(KVM_STATE(ms->accelerator), KVM_CAP_VM_TYPES) & BIT(kvm_type))) {
        error_report("vm-type %s not supported by KVM", vm_type_name[kvm_type]);
        exit(1);
    }
    return kvm_type;
 }
 bool kvm_has_smm(void)
 {
    return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM);
@@ -247,7 +277,7 @@ void kvm_synchronize_all_tsc(void)
 {
    CPUState *cpu;
-    if (kvm_enabled()) {
+    if (kvm_enabled() && !is_tdx_vm()) {
        CPU_FOREACH(cpu) {
            run_on_cpu(cpu, do_kvm_synchronize_tsc, RUN_ON_CPU_NULL);
        }
@@ -490,6 +520,10 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
        ret |= 1U << KVM_HINTS_REALTIME;
    }
    if (is_tdx_vm()) {
        tdx_get_supported_cpuid(function, index, reg, &ret);
    }
    return ret;
 }
@@ -759,6 +793,15 @@ static int kvm_arch_set_tsc_khz(CPUState *cs)
    int r, cur_freq;
    bool set_ioctl = false;
    /*
     * TSC of TD vcpu is immutable, it cannot be set/changed via vcpu scope
     * VM_SET_TSC_KHZ, but only be initialized via VM scope VM_SET_TSC_KHZ
     * before ioctl KVM_TDX_INIT_VM in tdx_pre_create_vcpu()
     */
    if (is_tdx_vm()) {
        return 0;
    }
    if (!env->tsc_khz) {
        return 0;
    }
@@ -1655,8 +1698,6 @@ static int hyperv_init_vcpu(X86CPU *cpu)
 static Error *invtsc_mig_blocker;
 #define KVM_MAX_CPUID_ENTRIES  100
 static void kvm_init_xsave(CPUX86State *env)
 {
    if (has_xsave2) {
@@ -1699,6 +1740,241 @@ static void kvm_init_nested_state(CPUX86State *env)
    }
 }
 uint32_t kvm_x86_arch_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries,
                            uint32_t cpuid_i)
 {
    uint32_t limit, i, j;
    uint32_t unused;
    struct kvm_cpuid_entry2 *c;
    if (cpuid_i >  KVM_MAX_CPUID_ENTRIES) {
        error_report("exceeded cpuid index (%d) for entries[]", cpuid_i);
        abort();
    }
    cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
    for (i = 0; i <= limit; i++) {
        if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
            fprintf(stderr, "unsupported level value: 0x%x\n", limit);
            abort();
        }
        c = &entries[cpuid_i++];
        switch (i) {
        case 2: {
            /* Keep reading function 2 till all the input is received */
            int times;
            c->function = i;
            c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC |
                       KVM_CPUID_FLAG_STATE_READ_NEXT;
            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
            times = c->eax & 0xff;
            for (j = 1; j < times; ++j) {
                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
                    fprintf(stderr, "cpuid_data is full, no space for "
                            "cpuid(eax:2):eax & 0xf = 0x%x\n", times);
                    abort();
                }
                c = &entries[cpuid_i++];
                c->function = i;
                c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
                cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
            }
            break;
        }
        case 0x1f:
            if (env->nr_dies < 2) {
                cpuid_i--;
                break;
            }
            /* fallthrough */
        case 4:
        case 0xb:
        case 0xd:
            for (j = 0; ; j++) {
                if (i == 0xd && j == 64) {
                    break;
                }
                c->function = i;
                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                c->index = j;
                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
                if (i == 4 && c->eax == 0) {
                    break;
                }
                if (i == 0xb && !(c->ecx & 0xff00)) {
                    break;
                }
                if (i == 0x1f && !(c->ecx & 0xff00)) {
                    break;
                }
                if (i == 0xd && c->eax == 0) {
                    continue;
                }
                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
                    fprintf(stderr, "cpuid_data is full, no space for "
                            "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
                    abort();
                }
                c = &entries[cpuid_i++];
            }
            break;
        case 0x12:
            for (j = 0; ; j++) {
                c->function = i;
                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                c->index = j;
                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
                if (j > 1 && (c->eax & 0xf) != 1) {
                    break;
                }
                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
                    fprintf(stderr, "cpuid_data is full, no space for "
                                "cpuid(eax:0x12,ecx:0x%x)\n", j);
                    abort();
                }
                c = &entries[cpuid_i++];
            }
            break;
        case 0x7:
        case 0x14:
        case 0x1d:
        case 0x1e: {
            uint32_t times;
            c->function = i;
            c->index = 0;
            c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
            times = c->eax;
            for (j = 1; j <= times; ++j) {
                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
                    fprintf(stderr, "cpuid_data is full, no space for "
                                "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
                    abort();
                }
                c = &entries[cpuid_i++];
                c->function = i;
                c->index = j;
                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
            }
            break;
        }
        default:
            c->function = i;
            c->flags = 0;
            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
            if (!c->eax && !c->ebx && !c->ecx && !c->edx) {
                /*
                 * KVM already returns all zeroes if a CPUID entry is missing,
                 * so we can omit it and avoid hitting KVM's 80-entry limit.
                 */
                cpuid_i--;
            }
            break;
        }
    }
    if (limit >= 0x0a) {
        uint32_t eax, edx;
        cpu_x86_cpuid(env, 0x0a, 0, &eax, &unused, &unused, &edx);
        has_architectural_pmu_version = eax & 0xff;
        if (has_architectural_pmu_version > 0) {
            num_architectural_pmu_gp_counters = (eax & 0xff00) >> 8;
            /* Shouldn't be more than 32, since that's the number of bits
             * available in EBX to tell us _which_ counters are available.
             * Play it safe.
             */
            if (num_architectural_pmu_gp_counters > MAX_GP_COUNTERS) {
                num_architectural_pmu_gp_counters = MAX_GP_COUNTERS;
            }
            if (has_architectural_pmu_version > 1) {
                num_architectural_pmu_fixed_counters = edx & 0x1f;
                if (num_architectural_pmu_fixed_counters > MAX_FIXED_COUNTERS) {
                    num_architectural_pmu_fixed_counters = MAX_FIXED_COUNTERS;
                }
            }
        }
    }
    cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
    for (i = 0x80000000; i <= limit; i++) {
        if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
            fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit);
            abort();
        }
        c = &entries[cpuid_i++];
        switch (i) {
        case 0x8000001d:
            /* Query for all AMD cache information leaves */
            for (j = 0; ; j++) {
                c->function = i;
                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                c->index = j;
                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
                if (c->eax == 0) {
                    break;
                }
                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
                    fprintf(stderr, "cpuid_data is full, no space for "
                            "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
                    abort();
                }
                c = &entries[cpuid_i++];
            }
            break;
        default:
            c->function = i;
            c->flags = 0;
            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
            if (!c->eax && !c->ebx && !c->ecx && !c->edx) {
                /*
                 * KVM already returns all zeroes if a CPUID entry is missing,
                 * so we can omit it and avoid hitting KVM's 80-entry limit.
                 */
                cpuid_i--;
            }
            break;
        }
    }
    /* Call Centaur's CPUID instructions they are supported. */
    if (env->cpuid_xlevel2 > 0) {
        cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused);
        for (i = 0xC0000000; i <= limit; i++) {
            if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
                fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit);
                abort();
            }
            c = &entries[cpuid_i++];
            c->function = i;
            c->flags = 0;
            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
        }
    }
    return cpuid_i;
 }
 int kvm_arch_init_vcpu(CPUState *cs)
 {
    struct {
@@ -1715,8 +1991,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
    X86CPU *cpu = X86_CPU(cs);
    CPUX86State *env = &cpu->env;
-    uint32_t limit, i, j, cpuid_i;
+    uint32_t cpuid_i;
    uint32_t unused;
    struct kvm_cpuid_entry2 *c;
    uint32_t signature[3];
    int kvm_base = KVM_CPUID_SIGNATURE;
@@ -1869,8 +2144,6 @@ int kvm_arch_init_vcpu(CPUState *cs)
        c->edx = env->features[FEAT_KVM_HINTS];
    }
    cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
    if (cpu->kvm_pv_enforce_cpuid) {
        r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1);
        if (r < 0) {
@@ -1881,224 +2154,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
        }
    }
-    for (i = 0; i <= limit; i++) {
+    cpuid_i = kvm_x86_arch_cpuid(env, cpuid_data.entries, cpuid_i);
        if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
            fprintf(stderr, "unsupported level value: 0x%x\n", limit);
            abort();
        }
        c = &cpuid_data.entries[cpuid_i++];
        switch (i) {
        case 2: {
            /* Keep reading function 2 till all the input is received */
            int times;
            c->function = i;
            c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC |
                       KVM_CPUID_FLAG_STATE_READ_NEXT;
            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
            times = c->eax & 0xff;
            for (j = 1; j < times; ++j) {
                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
                    fprintf(stderr, "cpuid_data is full, no space for "
                            "cpuid(eax:2):eax & 0xf = 0x%x\n", times);
                    abort();
                }
                c = &cpuid_data.entries[cpuid_i++];
                c->function = i;
                c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
                cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
            }
            break;
        }
        case 0x1f:
            if (env->nr_dies < 2) {
                cpuid_i--;
                break;
            }
            /* fallthrough */
        case 4:
        case 0xb:
        case 0xd:
            for (j = 0; ; j++) {
                if (i == 0xd && j == 64) {
                    break;
                }
                c->function = i;
                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                c->index = j;
                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
                if (i == 4 && c->eax == 0) {
                    break;
                }
                if (i == 0xb && !(c->ecx & 0xff00)) {
                    break;
                }
                if (i == 0x1f && !(c->ecx & 0xff00)) {
                    break;
                }
                if (i == 0xd && c->eax == 0) {
                    continue;
                }
                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
                    fprintf(stderr, "cpuid_data is full, no space for "
                            "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
                    abort();
                }
                c = &cpuid_data.entries[cpuid_i++];
            }
            break;
        case 0x12:
            for (j = 0; ; j++) {
                c->function = i;
                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                c->index = j;
                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
                if (j > 1 && (c->eax & 0xf) != 1) {
                    break;
                }
                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
                    fprintf(stderr, "cpuid_data is full, no space for "
                                "cpuid(eax:0x12,ecx:0x%x)\n", j);
                    abort();
                }
                c = &cpuid_data.entries[cpuid_i++];
            }
            break;
        case 0x7:
        case 0x14:
        case 0x1d:
        case 0x1e: {
            uint32_t times;
            c->function = i;
            c->index = 0;
            c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
            times = c->eax;
            for (j = 1; j <= times; ++j) {
                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
                    fprintf(stderr, "cpuid_data is full, no space for "
                                "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
                    abort();
                }
                c = &cpuid_data.entries[cpuid_i++];
                c->function = i;
                c->index = j;
                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
            }
            break;
        }
        default:
            c->function = i;
            c->flags = 0;
            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
            if (!c->eax && !c->ebx && !c->ecx && !c->edx) {
                /*
                 * KVM already returns all zeroes if a CPUID entry is missing,
                 * so we can omit it and avoid hitting KVM's 80-entry limit.
                 */
                cpuid_i--;
            }
            break;
        }
    }
    if (limit >= 0x0a) {
        uint32_t eax, edx;
        cpu_x86_cpuid(env, 0x0a, 0, &eax, &unused, &unused, &edx);
        has_architectural_pmu_version = eax & 0xff;
        if (has_architectural_pmu_version > 0) {
            num_architectural_pmu_gp_counters = (eax & 0xff00) >> 8;
            /* Shouldn't be more than 32, since that's the number of bits
             * available in EBX to tell us _which_ counters are available.
             * Play it safe.
             */
            if (num_architectural_pmu_gp_counters > MAX_GP_COUNTERS) {
                num_architectural_pmu_gp_counters = MAX_GP_COUNTERS;
            }
            if (has_architectural_pmu_version > 1) {
                num_architectural_pmu_fixed_counters = edx & 0x1f;
                if (num_architectural_pmu_fixed_counters > MAX_FIXED_COUNTERS) {
                    num_architectural_pmu_fixed_counters = MAX_FIXED_COUNTERS;
                }
            }
        }
    }
    cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
    for (i = 0x80000000; i <= limit; i++) {
        if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
            fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit);
            abort();
        }
        c = &cpuid_data.entries[cpuid_i++];
        switch (i) {
        case 0x8000001d:
            /* Query for all AMD cache information leaves */
            for (j = 0; ; j++) {
                c->function = i;
                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                c->index = j;
                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
                if (c->eax == 0) {
                    break;
                }
                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
                    fprintf(stderr, "cpuid_data is full, no space for "
                            "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
                    abort();
                }
                c = &cpuid_data.entries[cpuid_i++];
            }
            break;
        default:
            c->function = i;
            c->flags = 0;
            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
            if (!c->eax && !c->ebx && !c->ecx && !c->edx) {
                /*
                 * KVM already returns all zeroes if a CPUID entry is missing,
                 * so we can omit it and avoid hitting KVM's 80-entry limit.
                 */
                cpuid_i--;
            }
            break;
        }
    }
    /* Call Centaur's CPUID instructions they are supported. */
    if (env->cpuid_xlevel2 > 0) {
        cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused);
        for (i = 0xC0000000; i <= limit; i++) {
            if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
                fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit);
                abort();
            }
            c = &cpuid_data.entries[cpuid_i++];
            c->function = i;
            c->flags = 0;
            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
        }
    }
    cpuid_data.cpuid.nent = cpuid_i;
    if (((env->cpuid_version >> 8)&0xF) >= 6
@@ -2224,6 +2280,15 @@ int kvm_arch_init_vcpu(CPUState *cs)
    return r;
 }
 int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
 {
    if (is_tdx_vm()) {
        return tdx_pre_create_vcpu(cpu, errp);
    }
    return 0;
 }
 int kvm_arch_destroy_vcpu(CPUState *cs)
 {
    X86CPU *cpu = X86_CPU(cs);
@@ -2520,21 +2585,14 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
    Error *local_err = NULL;
    /*
-     * Initialize SEV context, if required
+     * Initialize confidential guest (SEV/TDX) context, if required
     *
     * If no memory encryption is requested (ms->cgs == NULL) this is
     * a no-op.
     *
     * It's also a no-op if a non-SEV confidential guest support
     * mechanism is selected.  SEV is the only mechanism available to
     * select on x86 at present, so this doesn't arise, but if new
     * mechanisms are supported in future (e.g. TDX), they'll need
     * their own initialization either here or elsewhere.
     */
-    ret = sev_kvm_init(ms->cgs, &local_err);
+    if (ms->cgs) {
-    if (ret < 0) {
+        ret = confidential_guest_kvm_init(ms->cgs, &local_err);
-        error_report_err(local_err);
+        if (ret < 0) {
-        return ret;
+            error_report_err(local_err);
            return ret;
        }
    }
    has_xcrs = kvm_check_extension(s, KVM_CAP_XCRS);
@@ -2994,6 +3052,11 @@ void kvm_put_apicbase(X86CPU *cpu, uint64_t value)
 {
    int ret;
    /* TODO: Allow accessing guest state for debug TDs. */
    if (is_tdx_vm()) {
        return;
    }
    ret = kvm_put_one_msr(cpu, MSR_IA32_APICBASE, value);
    assert(ret == 1);
 }
@@ -3212,32 +3275,34 @@ static void kvm_init_msrs(X86CPU *cpu)
    CPUX86State *env = &cpu->env;
    kvm_msr_buf_reset(cpu);
    if (has_msr_arch_capabs) {
        kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES,
                          env->features[FEAT_ARCH_CAPABILITIES]);
    }
-    if (has_msr_core_capabs) {
+    if (!is_tdx_vm()) {
-        kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY,
+        if (has_msr_arch_capabs) {
-                          env->features[FEAT_CORE_CAPABILITY]);
+            kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES,
-    }
+                                env->features[FEAT_ARCH_CAPABILITIES]);
        }
-    if (has_msr_perf_capabs && cpu->enable_pmu) {
+        if (has_msr_core_capabs) {
-        kvm_msr_entry_add_perf(cpu, env->features);
+            kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY,
                                env->features[FEAT_CORE_CAPABILITY]);
        }
        if (has_msr_perf_capabs && cpu->enable_pmu) {
            kvm_msr_entry_add_perf(cpu, env->features);
        }
        /*
         * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but
         * all kernels with MSR features should have them.
         */
        if (kvm_feature_msrs && cpu_has_vmx(env)) {
            kvm_msr_entry_add_vmx(cpu, env->features);
        }
    }
    if (has_msr_ucode_rev) {
        kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev);
    }
    /*
     * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but
     * all kernels with MSR features should have them.
     */
    if (kvm_feature_msrs && cpu_has_vmx(env)) {
        kvm_msr_entry_add_vmx(cpu, env->features);
    }
    assert(kvm_buf_set_msrs(cpu) == 0);
 }
@@ -4555,6 +4620,11 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
    assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
    /* TODO: Allow accessing guest state for debug TDs. */
    if (is_tdx_vm()) {
        return 0;
    }
    /*
     * Put MSR_IA32_FEATURE_CONTROL first, this ensures the VM gets out of VMX
     * root operation upon vCPU reset. kvm_put_msr_feature_control() should also
@@ -4655,6 +4725,12 @@ int kvm_arch_get_registers(CPUState *cs)
    if (ret < 0) {
        goto out;
    }
    /* TODO: Allow accessing guest state for debug TDs. */
    if (is_tdx_vm()) {
        return 0;
    }
    ret = kvm_getput_regs(cpu, 0);
    if (ret < 0) {
        goto out;
@@ -5355,6 +5431,14 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
        ret = kvm_xen_handle_exit(cpu, &run->xen);
        break;
 #endif
    case KVM_EXIT_TDX:
        if (!is_tdx_vm()) {
            error_report("KVM: get KVM_EXIT_TDX for a non-TDX VM.");
            ret = -1;
            break;
        }
        ret = tdx_handle_exit(cpu, &run->tdx);
        break;
    default:
        fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
        ret = -1;
@@ -5607,7 +5691,7 @@ bool kvm_has_waitpkg(void)
 bool kvm_arch_cpu_check_are_resettable(void)
 {
-    return !sev_es_enabled();
+    return !sev_es_enabled() && !is_tdx_vm();
 }
 #define ARCH_REQ_XCOMP_GUEST_PERM       0x1025
--- a/target/i386/kvm/kvm_i386.h
+++ b/target/i386/kvm/kvm_i386.h
@@ -13,6 +13,8 @@
 #include "sysemu/kvm.h"
 #define KVM_MAX_CPUID_ENTRIES  100
 #ifdef CONFIG_KVM
 #define kvm_pit_in_kernel() \
@@ -22,6 +24,9 @@
 #define kvm_ioapic_in_kernel() \
    (kvm_irqchip_in_kernel() && !kvm_irqchip_is_split())
 uint32_t kvm_x86_arch_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries,
                            uint32_t cpuid_i);
 #else
 #define kvm_pit_in_kernel()      0
@@ -37,6 +42,7 @@ bool kvm_hv_vpindex_settable(void);
 bool kvm_enable_sgx_provisioning(KVMState *s);
 bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp);
 int kvm_get_vm_type(MachineState *ms, const char *vm_type);
 void kvm_arch_reset_vcpu(X86CPU *cs);
 void kvm_arch_after_reset_vcpu(X86CPU *cpu);
 void kvm_arch_do_init_vcpu(X86CPU *cs);
--- a/target/i386/kvm/meson.build
+++ b/target/i386/kvm/meson.build
@@ -7,7 +7,7 @@ i386_kvm_ss.add(files(
 i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c'))
-i386_kvm_ss.add(when: 'CONFIG_SEV', if_false: files('sev-stub.c'))
+i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c', 'tdx-quote-generator.c'), if_false: files('tdx-stub.c'))
 i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c'))
--- a/target/i386/kvm/sev-stub.c
+++ b/target/i386/kvm/sev-stub.c
@@ -1,21 +0,0 @@
 /*
 * QEMU SEV stub
 *
 * Copyright Advanced Micro Devices 2018
 *
 * Authors:
 *      Brijesh Singh <brijesh.singh@amd.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
 * See the COPYING file in the top-level directory.
 *
 */
 #include "qemu/osdep.h"
 #include "sev.h"
 int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
 {
    /* If we get here, cgs must be some non-SEV thing */
    return 0;
 }
--- a/target/i386/kvm/tdx-quote-generator.c
+++ b/target/i386/kvm/tdx-quote-generator.c
@@ -0,0 +1,170 @@
 /*
 * QEMU TDX support
 *
 * Copyright Intel
 *
 * Author:
 *      Xiaoyao Li <xiaoyao.li@intel.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
 * See the COPYING file in the top-level directory
 *
 */
 #include "qemu/osdep.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "qapi/qapi-visit-sockets.h"
 #include "tdx-quote-generator.h"
 typedef struct TdxQuoteGeneratorClass {
    DeviceClass parent_class;
 } TdxQuoteGeneratorClass;
 OBJECT_DEFINE_TYPE(TdxQuoteGenerator, tdx_quote_generator, TDX_QUOTE_GENERATOR, OBJECT)
 static void tdx_quote_generator_finalize(Object *obj)
 {
 }
 static void tdx_quote_generator_class_init(ObjectClass *oc, void *data)
 {
 }
 static void tdx_quote_generator_init(Object *obj)
 {
 }
 static void tdx_generate_quote_cleanup(struct tdx_generate_quote_task *task)
 {
    timer_del(&task->timer);
    g_source_remove(task->watch);
    qio_channel_close(QIO_CHANNEL(task->sioc), NULL);
    object_unref(OBJECT(task->sioc));
    /* Maintain the number of in-flight requests. */
    qemu_mutex_lock(&task->quote_gen->lock);
    task->quote_gen->num--;
    qemu_mutex_unlock(&task->quote_gen->lock);
    task->completion(task);
 }
 static gboolean tdx_get_quote_read(QIOChannel *ioc, GIOCondition condition,
                                   gpointer opaque)
 {
    struct tdx_generate_quote_task *task = opaque;
    Error *err = NULL;
    int ret;
    ret = qio_channel_read(ioc, task->receive_buf + task->receive_buf_received,
                           task->payload_len - task->receive_buf_received, &err);
    if (ret < 0) {
        if (ret ==  QIO_CHANNEL_ERR_BLOCK) {
            return G_SOURCE_CONTINUE;
        } else {
            error_report_err(err);
            task->status_code = TDX_VP_GET_QUOTE_ERROR;
            goto end;
        }
    }
    task->receive_buf_received += ret;
    if (ret == 0 || task->receive_buf_received == task->payload_len) {
        task->status_code = TDX_VP_GET_QUOTE_SUCCESS;
        goto end;
    }
    return G_SOURCE_CONTINUE;
 end:
    tdx_generate_quote_cleanup(task);
    return G_SOURCE_REMOVE;
 }
 static gboolean tdx_send_report(QIOChannel *ioc, GIOCondition condition,
                                gpointer opaque)
 {
    struct tdx_generate_quote_task *task = opaque;
    Error *err = NULL;
    int ret;
    ret = qio_channel_write(ioc, task->send_data + task->send_data_sent,
                            task->send_data_size - task->send_data_sent, &err);
    if (ret < 0) {
        if (ret == QIO_CHANNEL_ERR_BLOCK) {
            ret = 0;
        } else {
            error_report_err(err);
            task->status_code = TDX_VP_GET_QUOTE_ERROR;
            tdx_generate_quote_cleanup(task);
            goto end;
        }
    }
    task->send_data_sent += ret;
    if (task->send_data_sent == task->send_data_size) {
        task->watch = qio_channel_add_watch(QIO_CHANNEL(task->sioc), G_IO_IN,
                                            tdx_get_quote_read, task, NULL);
        goto end;
    }
    return G_SOURCE_CONTINUE;
 end:
    return G_SOURCE_REMOVE;
 }
 static void tdx_quote_generator_connected(QIOTask *qio_task, gpointer opaque)
 {
    struct tdx_generate_quote_task *task = opaque;
    Error *err = NULL;
    int ret;
    ret = qio_task_propagate_error(qio_task, &err);
    if (ret) {
        error_report_err(err);
        task->status_code = TDX_VP_GET_QUOTE_QGS_UNAVAILABLE;
        tdx_generate_quote_cleanup(task);
        return;
    }
    task->watch = qio_channel_add_watch(QIO_CHANNEL(task->sioc), G_IO_OUT,
                                        tdx_send_report, task, NULL);
 }
 #define TRANSACTION_TIMEOUT 30000
 static void getquote_expired(void *opaque)
 {
    struct tdx_generate_quote_task *task = opaque;
    task->status_code = TDX_VP_GET_QUOTE_ERROR;
    tdx_generate_quote_cleanup(task);
 }
 static void setup_get_quote_timer(struct tdx_generate_quote_task *task)
 {
    int64_t time;
    timer_init_ms(&task->timer, QEMU_CLOCK_VIRTUAL, getquote_expired, task);
    time = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
    timer_mod(&task->timer, time + TRANSACTION_TIMEOUT);
 }
 void tdx_generate_quote(struct tdx_generate_quote_task *task)
 {
    struct TdxQuoteGenerator *quote_gen = task->quote_gen;
    QIOChannelSocket *sioc;
    sioc = qio_channel_socket_new();
    task->sioc = sioc;
    setup_get_quote_timer(task);
    qio_channel_socket_connect_async(sioc, quote_gen->socket,
                                     tdx_quote_generator_connected, task,
                                     NULL, NULL);
 }
--- a/target/i386/kvm/tdx-quote-generator.h
+++ b/target/i386/kvm/tdx-quote-generator.h
@@ -0,0 +1,95 @@
 #ifndef QEMU_I386_TDX_QUOTE_GENERATOR_H
 #define QEMU_I386_TDX_QUOTE_GENERATOR_H
 #include "qom/object_interfaces.h"
 #include "io/channel-socket.h"
 #include "exec/hwaddr.h"
 /* tdx quote generation */
 struct TdxQuoteGenerator {
    Object parent_obj;
    int num;
    SocketAddress *socket;
    QemuMutex lock;
 };
 #define TYPE_TDX_QUOTE_GENERATOR "tdx-quote-generator"
 OBJECT_DECLARE_SIMPLE_TYPE(TdxQuoteGenerator, TDX_QUOTE_GENERATOR)
 #define TDX_GET_QUOTE_STRUCTURE_VERSION 1ULL
 #define TDX_VP_GET_QUOTE_SUCCESS                0ULL
 #define TDX_VP_GET_QUOTE_IN_FLIGHT              (-1ULL)
 #define TDX_VP_GET_QUOTE_ERROR                  0x8000000000000000ULL
 #define TDX_VP_GET_QUOTE_QGS_UNAVAILABLE        0x8000000000000001ULL
 /* Limit to avoid resource starvation. */
 #define TDX_GET_QUOTE_MAX_BUF_LEN       (128 * 1024)
 #define TDX_MAX_GET_QUOTE_REQUEST       16
 #define TDX_GET_QUOTE_HDR_SIZE          24
 /* Format of pages shared with guest. */
 struct tdx_get_quote_header {
    /* Format version: must be 1 in little endian. */
    uint64_t structure_version;
    /*
     * GetQuote status code in little endian:
     *   Guest must set error_code to 0 to avoid information leak.
     *   Qemu sets this before interrupting guest.
     */
    uint64_t error_code;
    /*
     * in-message size in little endian: The message will follow this header.
     * The in-message will be send to QGS.
     */
    uint32_t in_len;
    /*
     * out-message size in little endian:
     * On request, out_len must be zero to avoid information leak.
     * On return, message size from QGS. Qemu overwrites this field.
     * The message will follows this header.  The in-message is overwritten.
     */
    uint32_t out_len;
    /*
     * Message buffer follows.
     * Guest sets message that will be send to QGS.  If out_len > in_len, guest
     * should zero remaining buffer to avoid information leak.
     * Qemu overwrites this buffer with a message returned from QGS.
     */
 };
 struct tdx_generate_quote_task {
    hwaddr buf_gpa;
    hwaddr payload_gpa;
    uint64_t payload_len;
    char *send_data;
    uint64_t send_data_size;
    uint64_t send_data_sent;
    char *receive_buf;
    uint64_t receive_buf_received;
    uint64_t status_code;
    struct tdx_get_quote_header hdr;
    QIOChannelSocket *sioc;
    guint watch;
    QEMUTimer timer;
    struct TdxQuoteGenerator *quote_gen;
    void (*completion)(struct tdx_generate_quote_task *task);
 };
 void tdx_generate_quote(struct tdx_generate_quote_task *task);
 #endif /* QEMU_I386_TDX_QUOTE_GENERATOR_H */
--- a/target/i386/kvm/tdx-stub.c
+++ b/target/i386/kvm/tdx-stub.c
@@ -0,0 +1,18 @@
 #include "qemu/osdep.h"
 #include "tdx.h"
 int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
 {
    return -EINVAL;
 }
 int tdx_parse_tdvf(void *flash_ptr, int size)
 {
    return -EINVAL;
 }
 int tdx_handle_exit(X86CPU *cpu, struct kvm_tdx_exit *tdx_exit)
 {
    return -EINVAL;
 }
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
--- a/target/i386/kvm/tdx.h
+++ b/target/i386/kvm/tdx.h
@@ -0,0 +1,81 @@
 #ifndef QEMU_I386_TDX_H
 #define QEMU_I386_TDX_H
 #ifndef CONFIG_USER_ONLY
 #include CONFIG_DEVICES /* CONFIG_TDX */
 #endif
 #include "exec/confidential-guest-support.h"
 #include "hw/i386/tdvf.h"
 #include "sysemu/kvm.h"
 #include "tdx-quote-generator.h"
 #define TYPE_TDX_GUEST "tdx-guest"
 #define TDX_GUEST(obj)  OBJECT_CHECK(TdxGuest, (obj), TYPE_TDX_GUEST)
 typedef struct TdxGuestClass {
    ConfidentialGuestSupportClass parent_class;
 } TdxGuestClass;
 #define TDG_VP_VMCALL_MAP_GPA                           0x10001ULL
 #define TDG_VP_VMCALL_GET_QUOTE                         0x10002ULL
 #define TDG_VP_VMCALL_REPORT_FATAL_ERROR                0x10003ULL
 #define TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT      0x10004ULL
 #define TDG_VP_VMCALL_SUCCESS           0x0000000000000000ULL
 #define TDG_VP_VMCALL_RETRY             0x0000000000000001ULL
 #define TDG_VP_VMCALL_INVALID_OPERAND   0x8000000000000000ULL
 #define TDG_VP_VMCALL_GPA_INUSE         0x8000000000000001ULL
 #define TDG_VP_VMCALL_ALIGN_ERROR       0x8000000000000002ULL
 enum TdxRamType{
    TDX_RAM_UNACCEPTED,
    TDX_RAM_ADDED,
 };
 typedef struct TdxRamEntry {
    uint64_t address;
    uint64_t length;
    enum TdxRamType type;
 } TdxRamEntry;
 typedef struct TdxGuest {
    ConfidentialGuestSupport parent_obj;
    QemuMutex lock;
    bool initialized;
    uint64_t attributes;    /* TD attributes */
    char *mrconfigid;       /* base64 encoded sha348 digest */
    char *mrowner;          /* base64 encoded sha348 digest */
    char *mrownerconfig;    /* base64 encoded sha348 digest */
    MemoryRegion *tdvf_mr;
    TdxFirmware tdvf;
    uint32_t nr_ram_entries;
    TdxRamEntry *ram_entries;
    /* runtime state */
    uint32_t event_notify_vector;
    uint32_t event_notify_apicid;
    /* GetQuote */
    TdxQuoteGenerator *quote_generator;
 } TdxGuest;
 #ifdef CONFIG_TDX
 bool is_tdx_vm(void);
 #else
 #define is_tdx_vm() 0
 #endif /* CONFIG_TDX */
 void tdx_get_supported_cpuid(uint32_t function, uint32_t index, int reg,
                             uint32_t *ret);
 int tdx_pre_create_vcpu(CPUState *cpu, Error **errp);
 void tdx_set_tdvf_region(MemoryRegion *tdvf_mr);
 int tdx_parse_tdvf(void *flash_ptr, int size);
 int tdx_handle_exit(X86CPU *cpu, struct kvm_tdx_exit *tdx_exit);
 #endif /* QEMU_I386_TDX_H */
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -353,63 +353,6 @@ static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp)
    sev->kernel_hashes = value;
 }
 static void
 sev_guest_class_init(ObjectClass *oc, void *data)
 {
    object_class_property_add_str(oc, "sev-device",
                                  sev_guest_get_sev_device,
                                  sev_guest_set_sev_device);
    object_class_property_set_description(oc, "sev-device",
            "SEV device to use");
    object_class_property_add_str(oc, "dh-cert-file",
                                  sev_guest_get_dh_cert_file,
                                  sev_guest_set_dh_cert_file);
    object_class_property_set_description(oc, "dh-cert-file",
            "guest owners DH certificate (encoded with base64)");
    object_class_property_add_str(oc, "session-file",
                                  sev_guest_get_session_file,
                                  sev_guest_set_session_file);
    object_class_property_set_description(oc, "session-file",
            "guest owners session parameters (encoded with base64)");
    object_class_property_add_bool(oc, "kernel-hashes",
                                   sev_guest_get_kernel_hashes,
                                   sev_guest_set_kernel_hashes);
    object_class_property_set_description(oc, "kernel-hashes",
            "add kernel hashes to guest firmware for measured Linux boot");
 }
 static void
 sev_guest_instance_init(Object *obj)
 {
    SevGuestState *sev = SEV_GUEST(obj);
    sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE);
    sev->policy = DEFAULT_GUEST_POLICY;
    object_property_add_uint32_ptr(obj, "policy", &sev->policy,
                                   OBJ_PROP_FLAG_READWRITE);
    object_property_add_uint32_ptr(obj, "handle", &sev->handle,
                                   OBJ_PROP_FLAG_READWRITE);
    object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos,
                                   OBJ_PROP_FLAG_READWRITE);
    object_property_add_uint32_ptr(obj, "reduced-phys-bits",
                                   &sev->reduced_phys_bits,
                                   OBJ_PROP_FLAG_READWRITE);
 }
 /* sev guest info */
 static const TypeInfo sev_guest_info = {
    .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT,
    .name = TYPE_SEV_GUEST,
    .instance_size = sizeof(SevGuestState),
    .instance_finalize = sev_guest_finalize,
    .class_init = sev_guest_class_init,
    .instance_init = sev_guest_instance_init,
    .interfaces = (InterfaceInfo[]) {
        { TYPE_USER_CREATABLE },
        { }
    }
 };
 bool
 sev_enabled(void)
 {
@@ -906,7 +849,7 @@ sev_vm_state_change(void *opaque, bool running, RunState state)
    }
 }
-int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
+static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
 {
    SevGuestState *sev
        = (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST);
@@ -1383,6 +1326,67 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
    return ret;
 }
 static void
 sev_guest_class_init(ObjectClass *oc, void *data)
 {
    ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
    klass->kvm_init = sev_kvm_init;
    object_class_property_add_str(oc, "sev-device",
                                  sev_guest_get_sev_device,
                                  sev_guest_set_sev_device);
    object_class_property_set_description(oc, "sev-device",
            "SEV device to use");
    object_class_property_add_str(oc, "dh-cert-file",
                                  sev_guest_get_dh_cert_file,
                                  sev_guest_set_dh_cert_file);
    object_class_property_set_description(oc, "dh-cert-file",
            "guest owners DH certificate (encoded with base64)");
    object_class_property_add_str(oc, "session-file",
                                  sev_guest_get_session_file,
                                  sev_guest_set_session_file);
    object_class_property_set_description(oc, "session-file",
            "guest owners session parameters (encoded with base64)");
    object_class_property_add_bool(oc, "kernel-hashes",
                                   sev_guest_get_kernel_hashes,
                                   sev_guest_set_kernel_hashes);
    object_class_property_set_description(oc, "kernel-hashes",
            "add kernel hashes to guest firmware for measured Linux boot");
 }
 static void
 sev_guest_instance_init(Object *obj)
 {
    SevGuestState *sev = SEV_GUEST(obj);
    sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE);
    sev->policy = DEFAULT_GUEST_POLICY;
    object_property_add_uint32_ptr(obj, "policy", &sev->policy,
                                   OBJ_PROP_FLAG_READWRITE);
    object_property_add_uint32_ptr(obj, "handle", &sev->handle,
                                   OBJ_PROP_FLAG_READWRITE);
    object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos,
                                   OBJ_PROP_FLAG_READWRITE);
    object_property_add_uint32_ptr(obj, "reduced-phys-bits",
                                   &sev->reduced_phys_bits,
                                   OBJ_PROP_FLAG_READWRITE);
 }
 /* sev guest info */
 static const TypeInfo sev_guest_info = {
    .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT,
    .name = TYPE_SEV_GUEST,
    .instance_size = sizeof(SevGuestState),
    .instance_finalize = sev_guest_finalize,
    .class_init = sev_guest_class_init,
    .instance_init = sev_guest_instance_init,
    .interfaces = (InterfaceInfo[]) {
        { TYPE_USER_CREATABLE },
        { }
    }
 };
 static void
 sev_register_types(void)
 {
--- a/target/i386/sev.h
+++ b/target/i386/sev.h
@@ -57,6 +57,4 @@ int sev_inject_launch_secret(const char *hdr, const char *secret,
 int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size);
 void sev_es_set_reset_vector(CPUState *cpu);
 int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);
 #endif
--- a/target/s390x/kvm/pv.c
+++ b/target/s390x/kvm/pv.c
@@ -340,6 +340,11 @@ int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
        return 0;
    }
    if (!kvm_enabled()) {
        error_setg(errp, "Protected Virtualization requires KVM");
        return -1;
    }
    if (!s390_has_feat(S390_FEAT_UNPACK)) {
        error_setg(errp,
                   "CPU model does not support Protected Virtualization");
@@ -364,6 +369,9 @@ OBJECT_DEFINE_TYPE_WITH_INTERFACES(S390PVGuest,
 static void s390_pv_guest_class_init(ObjectClass *oc, void *data)
 {
    ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
    klass->kvm_init = s390_pv_kvm_init;
 }
 static void s390_pv_guest_init(Object *obj)
--- a/target/s390x/kvm/pv.h
+++ b/target/s390x/kvm/pv.h
@@ -80,18 +80,4 @@ static inline int kvm_s390_dump_mem_state(uint64_t addr, size_t len,
 static inline int kvm_s390_dump_completion_data(void *buff) { return 0; }
 #endif /* CONFIG_KVM */
 int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);
 static inline int s390_pv_init(ConfidentialGuestSupport *cgs, Error **errp)
 {
    if (!cgs) {
        return 0;
    }
    if (kvm_enabled()) {
        return s390_pv_kvm_init(cgs, errp);
    }
    error_setg(errp, "Protected Virtualization requires KVM");
    return -1;
 }
 #endif /* HW_S390_PV_H */