Compare commits
69 Commits
factory-tm
...
tdx-qemu-u
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
97d7eee445 | ||
|
|
2f2df9a3bc | ||
|
|
dc149a2dab | ||
|
|
dd3280e555 | ||
|
|
48745be408 | ||
|
|
c5fe579baa | ||
|
|
a1827de88d | ||
|
|
5cdcac0ad8 | ||
|
|
7887fbc910 | ||
|
|
895c595bc2 | ||
|
|
4736232354 | ||
|
|
56d60881a9 | ||
|
|
83568ce7cf | ||
|
|
c19eb0bf28 | ||
|
|
cf78496f76 | ||
|
|
ac4bf9e0c9 | ||
|
|
1f0e164403 | ||
|
|
b229d5fbee | ||
|
|
74fd273f24 | ||
|
|
a17b2fc06d | ||
|
|
4b15af10b9 | ||
|
|
db8c6a73df | ||
|
|
89daa71741 | ||
|
|
3e682e2ecd | ||
|
|
86303dc132 | ||
|
|
13cc418776 | ||
|
|
0ca018b4ae | ||
|
|
3993dcd46f | ||
|
|
f55358924b | ||
|
|
b2caa23cb6 | ||
|
|
11df3f7c04 | ||
|
|
340f514448 | ||
|
|
e49e612f39 | ||
|
|
24593b3808 | ||
|
|
800c010a8e | ||
|
|
7be7fc35fd | ||
|
|
2d852d9402 | ||
|
|
d2dd1f08ed | ||
|
|
4dc0aabacb | ||
|
|
e930ce95da | ||
|
|
f779417259 | ||
|
|
47fe7e2994 | ||
|
|
57e6e2387e | ||
|
|
89f9a3eb99 | ||
|
|
fd43407abb | ||
|
|
f59fe9c167 | ||
|
|
2768bbfdd4 | ||
|
|
6819098716 | ||
|
|
41a55b03b3 | ||
|
|
d64d0075c9 | ||
|
|
ae72789e97 | ||
|
|
416483b146 | ||
|
|
9dc75f6117 | ||
|
|
cfd98e2a03 | ||
|
|
01b3347bd7 | ||
|
|
b1c3368c4b | ||
|
|
018b1b64cf | ||
|
|
631c138264 | ||
|
|
a9c371e165 | ||
|
|
cfef6eb7f5 | ||
|
|
0b4947a9c9 | ||
|
|
6eab5a906b | ||
|
|
ae7f7e366d | ||
|
|
01ac1b1852 | ||
|
|
bd8954a319 | ||
|
|
50f8285f8a | ||
|
|
9b465f8e78 | ||
|
|
e4fd888eb8 | ||
|
|
57f5e8b888 |
@@ -91,6 +91,8 @@ bool kvm_msi_use_devid;
|
||||
static bool kvm_has_guest_debug;
|
||||
static int kvm_sstep_flags;
|
||||
static bool kvm_immediate_exit;
|
||||
static bool kvm_guest_memfd_supported;
|
||||
static uint64_t kvm_supported_memory_attributes;
|
||||
static hwaddr kvm_max_slot_size = ~0;
|
||||
|
||||
static const KVMCapabilityInfo kvm_required_capabilites[] = {
|
||||
@@ -282,34 +284,69 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
|
||||
static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, bool new)
|
||||
{
|
||||
KVMState *s = kvm_state;
|
||||
struct kvm_userspace_memory_region mem;
|
||||
struct kvm_userspace_memory_region2 mem;
|
||||
static int cap_user_memory2 = -1;
|
||||
int ret;
|
||||
|
||||
if (cap_user_memory2 == -1) {
|
||||
cap_user_memory2 = kvm_check_extension(s, KVM_CAP_USER_MEMORY2);
|
||||
}
|
||||
|
||||
if (!cap_user_memory2 && slot->guest_memfd >= 0) {
|
||||
error_report("%s, KVM doesn't support KVM_CAP_USER_MEMORY2,"
|
||||
" which is required by guest memfd!", __func__);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
mem.slot = slot->slot | (kml->as_id << 16);
|
||||
mem.guest_phys_addr = slot->start_addr;
|
||||
mem.userspace_addr = (unsigned long)slot->ram;
|
||||
mem.flags = slot->flags;
|
||||
mem.guest_memfd = slot->guest_memfd;
|
||||
mem.guest_memfd_offset = slot->guest_memfd_offset;
|
||||
|
||||
if (slot->memory_size && !new && (mem.flags ^ slot->old_flags) & KVM_MEM_READONLY) {
|
||||
/* Set the slot size to 0 before setting the slot to the desired
|
||||
* value. This is needed based on KVM commit 75d61fbc. */
|
||||
mem.memory_size = 0;
|
||||
ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
|
||||
|
||||
if (cap_user_memory2) {
|
||||
ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem);
|
||||
} else {
|
||||
ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
|
||||
}
|
||||
if (ret < 0) {
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
mem.memory_size = slot->memory_size;
|
||||
ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
|
||||
if (cap_user_memory2) {
|
||||
ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem);
|
||||
} else {
|
||||
ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
|
||||
}
|
||||
slot->old_flags = mem.flags;
|
||||
err:
|
||||
trace_kvm_set_user_memory(mem.slot, mem.flags, mem.guest_phys_addr,
|
||||
mem.memory_size, mem.userspace_addr, ret);
|
||||
trace_kvm_set_user_memory(mem.slot >> 16, (uint16_t)mem.slot, mem.flags,
|
||||
mem.guest_phys_addr, mem.memory_size,
|
||||
mem.userspace_addr, mem.guest_memfd,
|
||||
mem.guest_memfd_offset, ret);
|
||||
if (ret < 0) {
|
||||
error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d,"
|
||||
" start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s",
|
||||
__func__, mem.slot, slot->start_addr,
|
||||
(uint64_t)mem.memory_size, strerror(errno));
|
||||
if (cap_user_memory2) {
|
||||
error_report("%s: KVM_SET_USER_MEMORY_REGION2 failed, slot=%d,"
|
||||
" start=0x%" PRIx64 ", size=0x%" PRIx64 ","
|
||||
" flags=0x%" PRIx32 ", guest_memfd=%" PRId32 ","
|
||||
" guest_memfd_offset=0x%" PRIx64 ": %s",
|
||||
__func__, mem.slot, slot->start_addr,
|
||||
(uint64_t)mem.memory_size, mem.flags,
|
||||
mem.guest_memfd, (uint64_t)mem.guest_memfd_offset,
|
||||
strerror(errno));
|
||||
} else {
|
||||
error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d,"
|
||||
" start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s",
|
||||
__func__, mem.slot, slot->start_addr,
|
||||
(uint64_t)mem.memory_size, strerror(errno));
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@@ -381,6 +418,11 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
|
||||
return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
|
||||
}
|
||||
|
||||
int __attribute__ ((weak)) kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_init_vcpu(CPUState *cpu, Error **errp)
|
||||
{
|
||||
KVMState *s = kvm_state;
|
||||
@@ -389,15 +431,27 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
|
||||
|
||||
trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
|
||||
|
||||
/*
|
||||
* tdx_pre_create_vcpu() may call cpu_x86_cpuid(). It in turn may call
|
||||
* kvm_vm_ioctl(). Set cpu->kvm_state in advance to avoid NULL pointer
|
||||
* dereference.
|
||||
*/
|
||||
cpu->kvm_state = s;
|
||||
ret = kvm_arch_pre_create_vcpu(cpu, errp);
|
||||
if (ret < 0) {
|
||||
cpu->kvm_state = NULL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
|
||||
if (ret < 0) {
|
||||
error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)",
|
||||
kvm_arch_vcpu_id(cpu));
|
||||
cpu->kvm_state = NULL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
cpu->kvm_fd = ret;
|
||||
cpu->kvm_state = s;
|
||||
cpu->vcpu_dirty = true;
|
||||
cpu->dirty_pages = 0;
|
||||
cpu->throttle_us_per_full = 0;
|
||||
@@ -464,6 +518,9 @@ static int kvm_mem_flags(MemoryRegion *mr)
|
||||
if (readonly && kvm_readonly_mem_allowed) {
|
||||
flags |= KVM_MEM_READONLY;
|
||||
}
|
||||
if (memory_region_has_guest_memfd(mr)) {
|
||||
flags |= KVM_MEM_GUEST_MEMFD;
|
||||
}
|
||||
return flags;
|
||||
}
|
||||
|
||||
@@ -1265,6 +1322,46 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size)
|
||||
kvm_max_slot_size = max_slot_size;
|
||||
}
|
||||
|
||||
static int kvm_set_memory_attributes(hwaddr start, hwaddr size, uint64_t attr)
|
||||
{
|
||||
struct kvm_memory_attributes attrs;
|
||||
int r;
|
||||
|
||||
if (kvm_supported_memory_attributes == 0) {
|
||||
error_report("No memory attribute supported by KVM\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if ((attr & kvm_supported_memory_attributes) != attr) {
|
||||
error_report("memory attribute 0x%lx not supported by KVM,"
|
||||
" supported bits are 0x%lx\n",
|
||||
attr, kvm_supported_memory_attributes);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
attrs.attributes = attr;
|
||||
attrs.address = start;
|
||||
attrs.size = size;
|
||||
attrs.flags = 0;
|
||||
|
||||
r = kvm_vm_ioctl(kvm_state, KVM_SET_MEMORY_ATTRIBUTES, &attrs);
|
||||
if (r) {
|
||||
error_report("failed to set memory (0x%lx+%#zx) with attr 0x%lx error '%s'",
|
||||
start, size, attr, strerror(errno));
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_set_memory_attributes_private(hwaddr start, hwaddr size)
|
||||
{
|
||||
return kvm_set_memory_attributes(start, size, KVM_MEMORY_ATTRIBUTE_PRIVATE);
|
||||
}
|
||||
|
||||
int kvm_set_memory_attributes_shared(hwaddr start, hwaddr size)
|
||||
{
|
||||
return kvm_set_memory_attributes(start, size, 0);
|
||||
}
|
||||
|
||||
/* Called with KVMMemoryListener.slots_lock held */
|
||||
static void kvm_set_phys_mem(KVMMemoryListener *kml,
|
||||
MemoryRegionSection *section, bool add)
|
||||
@@ -1361,6 +1458,9 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
|
||||
mem->ram_start_offset = ram_start_offset;
|
||||
mem->ram = ram;
|
||||
mem->flags = kvm_mem_flags(mr);
|
||||
mem->guest_memfd = mr->ram_block->guest_memfd;
|
||||
mem->guest_memfd_offset = (uint8_t*)ram - mr->ram_block->host;
|
||||
|
||||
kvm_slot_init_dirty_bitmap(mem);
|
||||
err = kvm_set_user_memory_region(kml, mem, true);
|
||||
if (err) {
|
||||
@@ -1368,6 +1468,16 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
|
||||
strerror(-err));
|
||||
abort();
|
||||
}
|
||||
|
||||
if (memory_region_has_guest_memfd(mr)) {
|
||||
err = kvm_set_memory_attributes_private(start_addr, slot_size);
|
||||
if (err) {
|
||||
error_report("%s: failed to set memory attribute private: %s\n",
|
||||
__func__, strerror(-err));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
start_addr += slot_size;
|
||||
ram_start_offset += slot_size;
|
||||
ram += slot_size;
|
||||
@@ -2395,6 +2505,11 @@ static int kvm_init(MachineState *ms)
|
||||
}
|
||||
s->as = g_new0(struct KVMAs, s->nr_as);
|
||||
|
||||
kvm_guest_memfd_supported = kvm_check_extension(s, KVM_CAP_GUEST_MEMFD);
|
||||
|
||||
ret = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES);
|
||||
kvm_supported_memory_attributes = ret > 0 ? ret : 0;
|
||||
|
||||
if (object_property_find(OBJECT(current_machine), "kvm-type")) {
|
||||
g_autofree char *kvm_type = object_property_get_str(OBJECT(current_machine),
|
||||
"kvm-type",
|
||||
@@ -2815,6 +2930,95 @@ static void kvm_eat_signals(CPUState *cpu)
|
||||
} while (sigismember(&chkset, SIG_IPI));
|
||||
}
|
||||
|
||||
int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private)
|
||||
{
|
||||
MemoryRegionSection section;
|
||||
ram_addr_t offset;
|
||||
MemoryRegion *mr;
|
||||
RAMBlock *rb;
|
||||
void *addr;
|
||||
int ret = -1;
|
||||
|
||||
trace_kvm_convert_memory(start, size, to_private ? "shared_to_private" : "private_to_shared");
|
||||
|
||||
if (!QEMU_PTR_IS_ALIGNED(start, qemu_host_page_size) ||
|
||||
!QEMU_PTR_IS_ALIGNED(size, qemu_host_page_size)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!size) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
section = memory_region_find(get_system_memory(), start, size);
|
||||
mr = section.mr;
|
||||
if (!mr) {
|
||||
/*
|
||||
* Ignore converting non-assigned region to shared.
|
||||
*
|
||||
* TDX requires vMMIO region to be shared to inject #VE to guest.
|
||||
* OVMF issues conservatively MapGPA(shared) on 32bit PCI MMIO region,
|
||||
* and vIO-APIC 0xFEC00000 4K page.
|
||||
* OVMF assigns 32bit PCI MMIO region to
|
||||
* [top of low memory: typically 2GB=0xC000000, 0xFC00000)
|
||||
*/
|
||||
if (!to_private) {
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (memory_region_has_guest_memfd(mr)) {
|
||||
if (to_private) {
|
||||
ret = kvm_set_memory_attributes_private(start, size);
|
||||
} else {
|
||||
ret = kvm_set_memory_attributes_shared(start, size);
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
memory_region_unref(section.mr);
|
||||
return ret;
|
||||
}
|
||||
|
||||
addr = memory_region_get_ram_ptr(mr) + section.offset_within_region;
|
||||
rb = qemu_ram_block_from_host(addr, false, &offset);
|
||||
|
||||
if (to_private) {
|
||||
if (rb->page_size != qemu_host_page_size) {
|
||||
/*
|
||||
* shared memory is back'ed by hugetlb, which is supposed to be
|
||||
* pre-allocated and doesn't need to be discarded
|
||||
*/
|
||||
return 0;
|
||||
} else {
|
||||
ret = ram_block_discard_range(rb, offset, size);
|
||||
}
|
||||
} else {
|
||||
ret = ram_block_discard_guest_memfd_range(rb, offset, size);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Because vMMIO region must be shared, guest TD may convert vMMIO
|
||||
* region to shared explicitly. Don't complain such case. See
|
||||
* memory_region_type() for checking if the region is MMIO region.
|
||||
*/
|
||||
if (!to_private &&
|
||||
!memory_region_is_ram(mr) &&
|
||||
!memory_region_is_ram_device(mr) &&
|
||||
!memory_region_is_rom(mr) &&
|
||||
!memory_region_is_romd(mr)) {
|
||||
ret = 0;
|
||||
} else {
|
||||
error_report("Convert non guest_memfd backed memory region "
|
||||
"(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s",
|
||||
start, size, to_private ? "private" : "shared");
|
||||
}
|
||||
}
|
||||
|
||||
memory_region_unref(section.mr);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvm_cpu_exec(CPUState *cpu)
|
||||
{
|
||||
struct kvm_run *run = cpu->kvm_run;
|
||||
@@ -2882,18 +3086,20 @@ int kvm_cpu_exec(CPUState *cpu)
|
||||
ret = EXCP_INTERRUPT;
|
||||
break;
|
||||
}
|
||||
fprintf(stderr, "error: kvm run failed %s\n",
|
||||
strerror(-run_ret));
|
||||
if (!(run_ret == -EFAULT && run->exit_reason == KVM_EXIT_MEMORY_FAULT)) {
|
||||
fprintf(stderr, "error: kvm run failed %s\n",
|
||||
strerror(-run_ret));
|
||||
#ifdef TARGET_PPC
|
||||
if (run_ret == -EBUSY) {
|
||||
fprintf(stderr,
|
||||
"This is probably because your SMT is enabled.\n"
|
||||
"VCPU can only run on primary threads with all "
|
||||
"secondary threads offline.\n");
|
||||
}
|
||||
if (run_ret == -EBUSY) {
|
||||
fprintf(stderr,
|
||||
"This is probably because your SMT is enabled.\n"
|
||||
"VCPU can only run on primary threads with all "
|
||||
"secondary threads offline.\n");
|
||||
}
|
||||
#endif
|
||||
ret = -1;
|
||||
break;
|
||||
ret = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
trace_kvm_run_exit(cpu->cpu_index, run->exit_reason);
|
||||
@@ -2976,6 +3182,16 @@ int kvm_cpu_exec(CPUState *cpu)
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case KVM_EXIT_MEMORY_FAULT:
|
||||
if (run->memory_fault.flags & ~KVM_MEMORY_EXIT_FLAG_PRIVATE) {
|
||||
error_report("KVM_EXIT_MEMORY_FAULT: Unknown flag 0x%" PRIx64,
|
||||
(uint64_t)run->memory_fault.flags);
|
||||
ret = -1;
|
||||
break;
|
||||
}
|
||||
ret = kvm_convert_memory(run->memory_fault.gpa, run->memory_fault.size,
|
||||
run->memory_fault.flags & KVM_MEMORY_EXIT_FLAG_PRIVATE);
|
||||
break;
|
||||
default:
|
||||
ret = kvm_arch_handle_exit(cpu, run);
|
||||
break;
|
||||
@@ -4094,3 +4310,25 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp)
|
||||
query_stats_schema_vcpu(first_cpu, &stats_args);
|
||||
}
|
||||
}
|
||||
|
||||
int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp)
|
||||
{
|
||||
int fd;
|
||||
struct kvm_create_guest_memfd guest_memfd = {
|
||||
.size = size,
|
||||
.flags = flags,
|
||||
};
|
||||
|
||||
if (!kvm_guest_memfd_supported) {
|
||||
error_setg(errp, "KVM doesn't support guest memfd\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_GUEST_MEMFD, &guest_memfd);
|
||||
if (fd < 0) {
|
||||
error_setg_errno(errp, errno, "Error creating kvm guest memfd");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@ kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
|
||||
kvm_irqchip_release_virq(int virq) "virq %d"
|
||||
kvm_set_ioeventfd_mmio(int fd, uint64_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%" PRIx64 " val=0x%x assign: %d size: %d match: %d"
|
||||
kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%x val=0x%x assign: %d size: %d match: %d"
|
||||
kvm_set_user_memory(uint32_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d"
|
||||
kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, uint32_t fd, uint64_t fd_offset, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " guest_memfd=%d" " guest_memfd_offset=0x%" PRIx64 " ret=%d"
|
||||
kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32
|
||||
kvm_resample_fd_notify(int gsi) "gsi %d"
|
||||
kvm_dirty_ring_full(int id) "vcpu %d"
|
||||
@@ -31,3 +31,4 @@ kvm_cpu_exec(void) ""
|
||||
kvm_interrupt_exit_request(void) ""
|
||||
kvm_io_window_exit(void) ""
|
||||
kvm_run_exit_system_event(int cpu_index, uint32_t event_type) "cpu_index %d, system_even_type %"PRIu32
|
||||
kvm_convert_memory(uint64_t start, uint64_t size, const char *msg) "start 0x%" PRIx64 " size 0x%" PRIx64 " %s"
|
||||
|
||||
@@ -129,3 +129,8 @@ bool kvm_hwpoisoned_mem(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
@@ -85,6 +85,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
|
||||
ram_flags |= fb->readonly ? RAM_READONLY_FD : 0;
|
||||
ram_flags |= fb->rom == ON_OFF_AUTO_ON ? RAM_READONLY : 0;
|
||||
ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
|
||||
ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
|
||||
ram_flags |= fb->is_pmem ? RAM_PMEM : 0;
|
||||
ram_flags |= RAM_NAMED_FILE;
|
||||
return memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name,
|
||||
|
||||
@@ -55,6 +55,7 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
|
||||
name = host_memory_backend_get_name(backend);
|
||||
ram_flags = backend->share ? RAM_SHARED : 0;
|
||||
ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
|
||||
ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
|
||||
return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name,
|
||||
backend->size, ram_flags, fd, 0, errp);
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
|
||||
name = host_memory_backend_get_name(backend);
|
||||
ram_flags = backend->share ? RAM_SHARED : 0;
|
||||
ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
|
||||
ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
|
||||
return memory_region_init_ram_flags_nomigrate(&backend->mr, OBJECT(backend),
|
||||
name, backend->size,
|
||||
ram_flags, errp);
|
||||
|
||||
@@ -277,6 +277,7 @@ static void host_memory_backend_init(Object *obj)
|
||||
/* TODO: convert access to globals to compat properties */
|
||||
backend->merge = machine_mem_merge(machine);
|
||||
backend->dump = machine_dump_guest_core(machine);
|
||||
backend->guest_memfd = machine_require_guest_memfd(machine);
|
||||
backend->reserve = true;
|
||||
backend->prealloc_threads = machine->smp.cpus;
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
#CONFIG_QXL=n
|
||||
#CONFIG_SEV=n
|
||||
#CONFIG_SGA=n
|
||||
#CONFIG_TDX=n
|
||||
#CONFIG_TEST_DEVICES=n
|
||||
#CONFIG_TPM_CRB=n
|
||||
#CONFIG_TPM_TIS_ISA=n
|
||||
|
||||
@@ -38,6 +38,7 @@ Supported mechanisms
|
||||
Currently supported confidential guest mechanisms are:
|
||||
|
||||
* AMD Secure Encrypted Virtualization (SEV) (see :doc:`i386/amd-memory-encryption`)
|
||||
* Intel Trust Domain Extension (TDX) (see :doc:`i386/tdx`)
|
||||
* POWER Protected Execution Facility (PEF) (see :ref:`power-papr-protected-execution-facility-pef`)
|
||||
* s390x Protected Virtualization (PV) (see :doc:`s390x/protvirt`)
|
||||
|
||||
|
||||
143
docs/system/i386/tdx.rst
Normal file
143
docs/system/i386/tdx.rst
Normal file
@@ -0,0 +1,143 @@
|
||||
Intel Trusted Domain eXtension (TDX)
|
||||
====================================
|
||||
|
||||
Intel Trusted Domain eXtensions (TDX) refers to an Intel technology that extends
|
||||
Virtual Machine Extensions (VMX) and Multi-Key Total Memory Encryption (MKTME)
|
||||
with a new kind of virtual machine guest called a Trust Domain (TD). A TD runs
|
||||
in a CPU mode that is designed to protect the confidentiality of its memory
|
||||
contents and its CPU state from any other software, including the hosting
|
||||
Virtual Machine Monitor (VMM), unless explicitly shared by the TD itself.
|
||||
|
||||
Prerequisites
|
||||
-------------
|
||||
|
||||
To run TD, the physical machine needs to have TDX module loaded and initialized
|
||||
while KVM hypervisor has TDX support and has TDX enabled. If those requirements
|
||||
are met, the ``KVM_CAP_VM_TYPES`` will report the support of ``KVM_X86_TDX_VM``.
|
||||
|
||||
Trust Domain Virtual Firmware (TDVF)
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Trust Domain Virtual Firmware (TDVF) is required to provide TD services to boot
|
||||
TD Guest OS. TDVF needs to be copied to guest private memory and measured before
|
||||
the TD boots.
|
||||
|
||||
KVM vcpu ioctl ``KVM_MEMORY_MAPPING`` can be used to populates the TDVF content
|
||||
into its private memory.
|
||||
|
||||
Since TDX doesn't support readonly memslot, TDVF cannot be mapped as pflash
|
||||
device and it actually works as RAM. "-bios" option is chosen to load TDVF.
|
||||
|
||||
OVMF is the opensource firmware that implements the TDVF support. Thus the
|
||||
command line to specify and load TDVF is ``-bios OVMF.fd``
|
||||
|
||||
KVM private memory
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
TD's memory (RAM) needs to be able to be transformed between private and shared.
|
||||
Its BIOS (OVMF/TDVF) needs to be mapped as private as well. Thus QEMU needs to
|
||||
allocate private guest memfd for them via KVM's IOCTL (KVM_CREATE_GUEST_MEMFD),
|
||||
which requires KVM is newer enough that reports KVM_CAP_GUEST_MEMFD.
|
||||
|
||||
Feature Control
|
||||
---------------
|
||||
|
||||
Unlike non-TDX VM, the CPU features (enumerated by CPU or MSR) of a TD is not
|
||||
under full control of VMM. VMM can only configure part of features of a TD on
|
||||
``KVM_TDX_INIT_VM`` command of VM scope ``MEMORY_ENCRYPT_OP`` ioctl.
|
||||
|
||||
The configurable features have three types:
|
||||
|
||||
- Attributes:
|
||||
- PKS (bit 30) controls whether Supervisor Protection Keys is exposed to TD,
|
||||
which determines related CPUID bit and CR4 bit;
|
||||
- PERFMON (bit 63) controls whether PMU is exposed to TD.
|
||||
|
||||
- XSAVE related features (XFAM):
|
||||
XFAM is a 64b mask, which has the same format as XCR0 or IA32_XSS MSR. It
|
||||
determines the set of extended features available for use by the guest TD.
|
||||
|
||||
- CPUID features:
|
||||
Only some bits of some CPUID leaves are directly configurable by VMM.
|
||||
|
||||
What features can be configured is reported via TDX capabilities.
|
||||
|
||||
TDX capabilities
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
The VM scope ``MEMORY_ENCRYPT_OP`` ioctl provides command ``KVM_TDX_CAPABILITIES``
|
||||
to get the TDX capabilities from KVM. It returns a data structure of
|
||||
``struct kvm_tdx_capabilites``, which tells the supported configuration of
|
||||
attributes, XFAM and CPUIDs.
|
||||
|
||||
TD attestation
|
||||
--------------
|
||||
|
||||
In TD guest, the attestation process is used to verify the TDX guest
|
||||
trustworthiness to other entities before provisioning secrets to the guest.
|
||||
|
||||
TD attestation is initiated first by calling TDG.MR.REPORT inside TD to get the
|
||||
REPORT. Then the REPORT data needs to be converted into a remotely verifiable
|
||||
Quote by SGX Quoting Enclave (QE).
|
||||
|
||||
A host daemon, Quote Generation Service (QGS), provides the functionality of
|
||||
SGX GE. It provides a socket address, to which a TD guest can connect via
|
||||
"quote-generation-socket" property. On the request of <GETQUOTE> from TD guest,
|
||||
QEMU sends the TDREPORT to QGS via "quote-generation-socket" socket, and gets
|
||||
the returning Quoting and return it back to TD guest.
|
||||
|
||||
Though "quote-generation-socket" is optional for booting the TD guest, it's a
|
||||
must for supporting TD guest atteatation.
|
||||
|
||||
Launching a TD (TDX VM)
|
||||
-----------------------
|
||||
|
||||
To launch a TDX guest, below are new added and required:
|
||||
|
||||
.. parsed-literal::
|
||||
|
||||
|qemu_system_x86| \\
|
||||
-object tdx-guest,id=tdx0 \\
|
||||
-machine ...,kernel-irqchip=split,confidential-guest-support=tdx0 \\
|
||||
-bios OVMF.fd \\
|
||||
|
||||
If TD attestation support is wanted:
|
||||
|
||||
.. parsed-literal::
|
||||
|
||||
|qemu_system_x86| \\
|
||||
-object '{"qom-type":"tdx-guest","id":"tdx0","quote-generation-socket":{"type": "vsock", "cid":"1","port":"1234"}}' \\
|
||||
-machine ...,kernel-irqchip=split,confidential-guest-support=tdx0 \\
|
||||
-bios OVMF.fd \\
|
||||
|
||||
Debugging
|
||||
---------
|
||||
|
||||
Bit 0 of TD attributes, is DEBUG bit, which decides if the TD runs in off-TD
|
||||
debug mode. When in off-TD debug mode, TD's VCPU state and private memory are
|
||||
accessible via given SEAMCALLs. This requires KVM to expose APIs to invoke those
|
||||
SEAMCALLs and resonponding QEMU change.
|
||||
|
||||
It's targeted as future work.
|
||||
|
||||
restrictions
|
||||
------------
|
||||
|
||||
- kernel-irqchip must be split;
|
||||
|
||||
- No readonly support for private memory;
|
||||
|
||||
- No SMM support: SMM support requires manipulating the guset register states
|
||||
which is not allowed;
|
||||
|
||||
Live Migration
|
||||
--------------
|
||||
|
||||
TODO
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
- `TDX Homepage <https://www.intel.com/content/www/us/en/developer/articles/technical/intel-trust-domain-extensions.html>`__
|
||||
|
||||
- `SGX QE <https://github.com/intel/SGXDataCenterAttestationPrimitives/tree/master/QuoteGeneration>`__
|
||||
@@ -29,6 +29,7 @@ Architectural features
|
||||
i386/kvm-pv
|
||||
i386/sgx
|
||||
i386/amd-memory-encryption
|
||||
i386/tdx
|
||||
|
||||
OS requirements
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
@@ -1192,6 +1192,11 @@ bool machine_mem_merge(MachineState *machine)
|
||||
return machine->mem_merge;
|
||||
}
|
||||
|
||||
bool machine_require_guest_memfd(MachineState *machine)
|
||||
{
|
||||
return machine->require_guest_memfd;
|
||||
}
|
||||
|
||||
static char *cpu_slot_to_string(const CPUArchId *cpu)
|
||||
{
|
||||
GString *s = g_string_new(NULL);
|
||||
|
||||
@@ -10,6 +10,11 @@ config SGX
|
||||
bool
|
||||
depends on KVM
|
||||
|
||||
config TDX
|
||||
bool
|
||||
select X86_FW_OVMF
|
||||
depends on KVM
|
||||
|
||||
config PC
|
||||
bool
|
||||
imply APPLESMC
|
||||
@@ -26,6 +31,7 @@ config PC
|
||||
imply QXL
|
||||
imply SEV
|
||||
imply SGX
|
||||
imply TDX
|
||||
imply TEST_DEVICES
|
||||
imply TPM_CRB
|
||||
imply TPM_TIS_ISA
|
||||
|
||||
@@ -964,7 +964,8 @@ static void build_dbg_aml(Aml *table)
|
||||
aml_append(table, scope);
|
||||
}
|
||||
|
||||
static Aml *build_link_dev(const char *name, uint8_t uid, Aml *reg)
|
||||
static Aml *build_link_dev(const char *name, uint8_t uid, Aml *reg,
|
||||
bool level_trigger_unsupported)
|
||||
{
|
||||
Aml *dev;
|
||||
Aml *crs;
|
||||
@@ -976,7 +977,10 @@ static Aml *build_link_dev(const char *name, uint8_t uid, Aml *reg)
|
||||
aml_append(dev, aml_name_decl("_UID", aml_int(uid)));
|
||||
|
||||
crs = aml_resource_template();
|
||||
aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL, AML_ACTIVE_HIGH,
|
||||
aml_append(crs, aml_interrupt(AML_CONSUMER,
|
||||
level_trigger_unsupported ?
|
||||
AML_EDGE : AML_LEVEL,
|
||||
AML_ACTIVE_HIGH,
|
||||
AML_SHARED, irqs, ARRAY_SIZE(irqs)));
|
||||
aml_append(dev, aml_name_decl("_PRS", crs));
|
||||
|
||||
@@ -1000,7 +1004,8 @@ static Aml *build_link_dev(const char *name, uint8_t uid, Aml *reg)
|
||||
return dev;
|
||||
}
|
||||
|
||||
static Aml *build_gsi_link_dev(const char *name, uint8_t uid, uint8_t gsi)
|
||||
static Aml *build_gsi_link_dev(const char *name, uint8_t uid,
|
||||
uint8_t gsi, bool level_trigger_unsupported)
|
||||
{
|
||||
Aml *dev;
|
||||
Aml *crs;
|
||||
@@ -1013,7 +1018,10 @@ static Aml *build_gsi_link_dev(const char *name, uint8_t uid, uint8_t gsi)
|
||||
|
||||
crs = aml_resource_template();
|
||||
irqs = gsi;
|
||||
aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL, AML_ACTIVE_HIGH,
|
||||
aml_append(crs, aml_interrupt(AML_CONSUMER,
|
||||
level_trigger_unsupported ?
|
||||
AML_EDGE : AML_LEVEL,
|
||||
AML_ACTIVE_HIGH,
|
||||
AML_SHARED, &irqs, 1));
|
||||
aml_append(dev, aml_name_decl("_PRS", crs));
|
||||
|
||||
@@ -1032,7 +1040,7 @@ static Aml *build_gsi_link_dev(const char *name, uint8_t uid, uint8_t gsi)
|
||||
}
|
||||
|
||||
/* _CRS method - get current settings */
|
||||
static Aml *build_iqcr_method(bool is_piix4)
|
||||
static Aml *build_iqcr_method(bool is_piix4, bool level_trigger_unsupported)
|
||||
{
|
||||
Aml *if_ctx;
|
||||
uint32_t irqs;
|
||||
@@ -1040,7 +1048,9 @@ static Aml *build_iqcr_method(bool is_piix4)
|
||||
Aml *crs = aml_resource_template();
|
||||
|
||||
irqs = 0;
|
||||
aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL,
|
||||
aml_append(crs, aml_interrupt(AML_CONSUMER,
|
||||
level_trigger_unsupported ?
|
||||
AML_EDGE : AML_LEVEL,
|
||||
AML_ACTIVE_HIGH, AML_SHARED, &irqs, 1));
|
||||
aml_append(method, aml_name_decl("PRR0", crs));
|
||||
|
||||
@@ -1074,7 +1084,7 @@ static Aml *build_irq_status_method(void)
|
||||
return method;
|
||||
}
|
||||
|
||||
static void build_piix4_pci0_int(Aml *table)
|
||||
static void build_piix4_pci0_int(Aml *table, bool level_trigger_unsupported)
|
||||
{
|
||||
Aml *dev;
|
||||
Aml *crs;
|
||||
@@ -1087,12 +1097,16 @@ static void build_piix4_pci0_int(Aml *table)
|
||||
aml_append(sb_scope, pci0_scope);
|
||||
|
||||
aml_append(sb_scope, build_irq_status_method());
|
||||
aml_append(sb_scope, build_iqcr_method(true));
|
||||
aml_append(sb_scope, build_iqcr_method(true, level_trigger_unsupported));
|
||||
|
||||
aml_append(sb_scope, build_link_dev("LNKA", 0, aml_name("PRQ0")));
|
||||
aml_append(sb_scope, build_link_dev("LNKB", 1, aml_name("PRQ1")));
|
||||
aml_append(sb_scope, build_link_dev("LNKC", 2, aml_name("PRQ2")));
|
||||
aml_append(sb_scope, build_link_dev("LNKD", 3, aml_name("PRQ3")));
|
||||
aml_append(sb_scope, build_link_dev("LNKA", 0, aml_name("PRQ0"),
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_link_dev("LNKB", 1, aml_name("PRQ1"),
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_link_dev("LNKC", 2, aml_name("PRQ2"),
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_link_dev("LNKD", 3, aml_name("PRQ3"),
|
||||
level_trigger_unsupported));
|
||||
|
||||
dev = aml_device("LNKS");
|
||||
{
|
||||
@@ -1101,7 +1115,9 @@ static void build_piix4_pci0_int(Aml *table)
|
||||
|
||||
crs = aml_resource_template();
|
||||
irqs = 9;
|
||||
aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL,
|
||||
aml_append(crs, aml_interrupt(AML_CONSUMER,
|
||||
level_trigger_unsupported ?
|
||||
AML_EDGE : AML_LEVEL,
|
||||
AML_ACTIVE_HIGH, AML_SHARED,
|
||||
&irqs, 1));
|
||||
aml_append(dev, aml_name_decl("_PRS", crs));
|
||||
@@ -1187,7 +1203,7 @@ static Aml *build_q35_routing_table(const char *str)
|
||||
return pkg;
|
||||
}
|
||||
|
||||
static void build_q35_pci0_int(Aml *table)
|
||||
static void build_q35_pci0_int(Aml *table, bool level_trigger_unsupported)
|
||||
{
|
||||
Aml *method;
|
||||
Aml *sb_scope = aml_scope("_SB");
|
||||
@@ -1226,25 +1242,41 @@ static void build_q35_pci0_int(Aml *table)
|
||||
aml_append(sb_scope, pci0_scope);
|
||||
|
||||
aml_append(sb_scope, build_irq_status_method());
|
||||
aml_append(sb_scope, build_iqcr_method(false));
|
||||
aml_append(sb_scope, build_iqcr_method(false, level_trigger_unsupported));
|
||||
|
||||
aml_append(sb_scope, build_link_dev("LNKA", 0, aml_name("PRQA")));
|
||||
aml_append(sb_scope, build_link_dev("LNKB", 1, aml_name("PRQB")));
|
||||
aml_append(sb_scope, build_link_dev("LNKC", 2, aml_name("PRQC")));
|
||||
aml_append(sb_scope, build_link_dev("LNKD", 3, aml_name("PRQD")));
|
||||
aml_append(sb_scope, build_link_dev("LNKE", 4, aml_name("PRQE")));
|
||||
aml_append(sb_scope, build_link_dev("LNKF", 5, aml_name("PRQF")));
|
||||
aml_append(sb_scope, build_link_dev("LNKG", 6, aml_name("PRQG")));
|
||||
aml_append(sb_scope, build_link_dev("LNKH", 7, aml_name("PRQH")));
|
||||
aml_append(sb_scope, build_link_dev("LNKA", 0, aml_name("PRQA"),
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_link_dev("LNKB", 1, aml_name("PRQB"),
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_link_dev("LNKC", 2, aml_name("PRQC"),
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_link_dev("LNKD", 3, aml_name("PRQD"),
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_link_dev("LNKE", 4, aml_name("PRQE"),
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_link_dev("LNKF", 5, aml_name("PRQF"),
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_link_dev("LNKG", 6, aml_name("PRQG"),
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_link_dev("LNKH", 7, aml_name("PRQH"),
|
||||
level_trigger_unsupported));
|
||||
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIA", 0x10, 0x10));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIB", 0x11, 0x11));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIC", 0x12, 0x12));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSID", 0x13, 0x13));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIE", 0x14, 0x14));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIF", 0x15, 0x15));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIG", 0x16, 0x16));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIH", 0x17, 0x17));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIA", 0x10, 0x10,
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIB", 0x11, 0x11,
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIC", 0x12, 0x12,
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSID", 0x13, 0x13,
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIE", 0x14, 0x14,
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIF", 0x15, 0x15,
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIG", 0x16, 0x16,
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIH", 0x17, 0x17,
|
||||
level_trigger_unsupported));
|
||||
|
||||
aml_append(table, sb_scope);
|
||||
}
|
||||
@@ -1426,6 +1458,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
|
||||
PCMachineState *pcms = PC_MACHINE(machine);
|
||||
PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);
|
||||
X86MachineState *x86ms = X86_MACHINE(machine);
|
||||
bool level_trigger_unsupported = x86ms->eoi_intercept_unsupported;
|
||||
AcpiMcfgInfo mcfg;
|
||||
bool mcfg_valid = !!acpi_get_mcfg(&mcfg);
|
||||
uint32_t nr_mem = machine->ram_slots;
|
||||
@@ -1458,7 +1491,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
|
||||
if (pm->pcihp_bridge_en || pm->pcihp_root_en) {
|
||||
build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base);
|
||||
}
|
||||
build_piix4_pci0_int(dsdt);
|
||||
build_piix4_pci0_int(dsdt, level_trigger_unsupported);
|
||||
} else if (q35) {
|
||||
sb_scope = aml_scope("_SB");
|
||||
dev = aml_device("PCI0");
|
||||
@@ -1502,7 +1535,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
|
||||
if (pm->pcihp_bridge_en) {
|
||||
build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base);
|
||||
}
|
||||
build_q35_pci0_int(dsdt);
|
||||
build_q35_pci0_int(dsdt, level_trigger_unsupported);
|
||||
}
|
||||
|
||||
if (misc->has_hpet) {
|
||||
|
||||
@@ -103,6 +103,7 @@ void acpi_build_madt(GArray *table_data, BIOSLinker *linker,
|
||||
const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(MACHINE(x86ms));
|
||||
AcpiTable table = { .sig = "APIC", .rev = 3, .oem_id = oem_id,
|
||||
.oem_table_id = oem_table_id };
|
||||
bool level_trigger_unsupported = x86ms->eoi_intercept_unsupported;
|
||||
|
||||
acpi_table_begin(&table, table_data);
|
||||
/* Local APIC Address */
|
||||
@@ -122,18 +123,42 @@ void acpi_build_madt(GArray *table_data, BIOSLinker *linker,
|
||||
IO_APIC_SECONDARY_ADDRESS, IO_APIC_SECONDARY_IRQBASE);
|
||||
}
|
||||
|
||||
if (x86mc->apic_xrupt_override) {
|
||||
build_xrupt_override(table_data, 0, 2,
|
||||
0 /* Flags: Conforms to the specifications of the bus */);
|
||||
}
|
||||
|
||||
for (i = 1; i < 16; i++) {
|
||||
if (!(x86ms->pci_irq_mask & (1 << i))) {
|
||||
/* No need for a INT source override structure. */
|
||||
continue;
|
||||
if (level_trigger_unsupported) {
|
||||
/* Force edge trigger */
|
||||
if (x86mc->apic_xrupt_override) {
|
||||
build_xrupt_override(table_data, 0, 2,
|
||||
/* Flags: active high, edge triggered */
|
||||
1 | (1 << 2));
|
||||
}
|
||||
|
||||
for (i = x86mc->apic_xrupt_override ? 1 : 0; i < 16; i++) {
|
||||
build_xrupt_override(table_data, i, i,
|
||||
/* Flags: active high, edge triggered */
|
||||
1 | (1 << 2));
|
||||
}
|
||||
|
||||
if (x86ms->ioapic2) {
|
||||
for (i = 0; i < 16; i++) {
|
||||
build_xrupt_override(table_data, IO_APIC_SECONDARY_IRQBASE + i,
|
||||
IO_APIC_SECONDARY_IRQBASE + i,
|
||||
/* Flags: active high, edge triggered */
|
||||
1 | (1 << 2));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (x86mc->apic_xrupt_override) {
|
||||
build_xrupt_override(table_data, 0, 2,
|
||||
0 /* Flags: Conforms to the specifications of the bus */);
|
||||
}
|
||||
|
||||
for (i = 1; i < 16; i++) {
|
||||
if (!(x86ms->pci_irq_mask & (1 << i))) {
|
||||
/* No need for a INT source override structure. */
|
||||
continue;
|
||||
}
|
||||
build_xrupt_override(table_data, i, i,
|
||||
0xd /* Flags: Active high, Level Triggered */);
|
||||
}
|
||||
build_xrupt_override(table_data, i, i,
|
||||
0xd /* Flags: Active high, Level Triggered */);
|
||||
}
|
||||
|
||||
if (x2apic_mode) {
|
||||
|
||||
@@ -28,6 +28,7 @@ i386_ss.add(when: 'CONFIG_PC', if_true: files(
|
||||
'port92.c'))
|
||||
i386_ss.add(when: 'CONFIG_X86_FW_OVMF', if_true: files('pc_sysfw_ovmf.c'),
|
||||
if_false: files('pc_sysfw_ovmf-stubs.c'))
|
||||
i386_ss.add(when: 'CONFIG_TDX', if_true: files('tdvf.c', 'tdvf-hob.c'))
|
||||
|
||||
subdir('kvm')
|
||||
subdir('xen')
|
||||
|
||||
21
hw/i386/pc.c
21
hw/i386/pc.c
@@ -43,6 +43,7 @@
|
||||
#include "sysemu/xen.h"
|
||||
#include "sysemu/reset.h"
|
||||
#include "kvm/kvm_i386.h"
|
||||
#include "kvm/tdx.h"
|
||||
#include "hw/xen/xen.h"
|
||||
#include "qapi/qmp/qlist.h"
|
||||
#include "qemu/error-report.h"
|
||||
@@ -1028,16 +1029,18 @@ void pc_memory_init(PCMachineState *pcms,
|
||||
/* Initialize PC system firmware */
|
||||
pc_system_firmware_init(pcms, rom_memory);
|
||||
|
||||
option_rom_mr = g_malloc(sizeof(*option_rom_mr));
|
||||
memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
|
||||
&error_fatal);
|
||||
if (pcmc->pci_enabled) {
|
||||
memory_region_set_readonly(option_rom_mr, true);
|
||||
if (!is_tdx_vm()) {
|
||||
option_rom_mr = g_malloc(sizeof(*option_rom_mr));
|
||||
memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
|
||||
&error_fatal);
|
||||
if (pcmc->pci_enabled) {
|
||||
memory_region_set_readonly(option_rom_mr, true);
|
||||
}
|
||||
memory_region_add_subregion_overlap(rom_memory,
|
||||
PC_ROM_MIN_VGA,
|
||||
option_rom_mr,
|
||||
1);
|
||||
}
|
||||
memory_region_add_subregion_overlap(rom_memory,
|
||||
PC_ROM_MIN_VGA,
|
||||
option_rom_mr,
|
||||
1);
|
||||
|
||||
fw_cfg = fw_cfg_arch_create(machine,
|
||||
x86ms->boot_cpus, x86ms->apic_id_limit);
|
||||
|
||||
@@ -212,6 +212,8 @@ static void pc_q35_init(MachineState *machine)
|
||||
x86ms->above_4g_mem_size, NULL);
|
||||
object_property_set_bool(phb, PCI_HOST_BYPASS_IOMMU,
|
||||
pcms->default_bus_bypass_iommu, NULL);
|
||||
object_property_set_bool(phb, PCI_HOST_PROP_SMM_RANGES,
|
||||
x86_machine_is_smm_enabled(x86ms), NULL);
|
||||
sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal);
|
||||
|
||||
/* pci */
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
#include "hw/block/flash.h"
|
||||
#include "sysemu/kvm.h"
|
||||
#include "sev.h"
|
||||
#include "kvm/tdx.h"
|
||||
|
||||
#define FLASH_SECTOR_SIZE 4096
|
||||
|
||||
@@ -254,5 +255,11 @@ void x86_firmware_configure(void *ptr, int size)
|
||||
}
|
||||
|
||||
sev_encrypt_flash(ptr, size, &error_fatal);
|
||||
} else if (is_tdx_vm()) {
|
||||
ret = tdx_parse_tdvf(ptr, size);
|
||||
if (ret) {
|
||||
error_report("failed to parse TDVF for TDX VM");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
147
hw/i386/tdvf-hob.c
Normal file
147
hw/i386/tdvf-hob.c
Normal file
@@ -0,0 +1,147 @@
|
||||
/*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
* Copyright (c) 2020 Intel Corporation
|
||||
* Author: Isaku Yamahata <isaku.yamahata at gmail.com>
|
||||
* <isaku.yamahata at intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/log.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "e820_memory_layout.h"
|
||||
#include "hw/i386/pc.h"
|
||||
#include "hw/i386/x86.h"
|
||||
#include "hw/pci/pcie_host.h"
|
||||
#include "sysemu/kvm.h"
|
||||
#include "standard-headers/uefi/uefi.h"
|
||||
#include "tdvf-hob.h"
|
||||
|
||||
typedef struct TdvfHob {
|
||||
hwaddr hob_addr;
|
||||
void *ptr;
|
||||
int size;
|
||||
|
||||
/* working area */
|
||||
void *current;
|
||||
void *end;
|
||||
} TdvfHob;
|
||||
|
||||
static uint64_t tdvf_current_guest_addr(const TdvfHob *hob)
|
||||
{
|
||||
return hob->hob_addr + (hob->current - hob->ptr);
|
||||
}
|
||||
|
||||
static void tdvf_align(TdvfHob *hob, size_t align)
|
||||
{
|
||||
hob->current = QEMU_ALIGN_PTR_UP(hob->current, align);
|
||||
}
|
||||
|
||||
static void *tdvf_get_area(TdvfHob *hob, uint64_t size)
|
||||
{
|
||||
void *ret;
|
||||
|
||||
if (hob->current + size > hob->end) {
|
||||
error_report("TD_HOB overrun, size = 0x%" PRIx64, size);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
ret = hob->current;
|
||||
hob->current += size;
|
||||
tdvf_align(hob, 8);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void tdvf_hob_add_memory_resources(TdxGuest *tdx, TdvfHob *hob)
|
||||
{
|
||||
EFI_HOB_RESOURCE_DESCRIPTOR *region;
|
||||
EFI_RESOURCE_ATTRIBUTE_TYPE attr;
|
||||
EFI_RESOURCE_TYPE resource_type;
|
||||
|
||||
TdxRamEntry *e;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < tdx->nr_ram_entries; i++) {
|
||||
e = &tdx->ram_entries[i];
|
||||
|
||||
if (e->type == TDX_RAM_UNACCEPTED) {
|
||||
resource_type = EFI_RESOURCE_MEMORY_UNACCEPTED;
|
||||
attr = EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED;
|
||||
} else if (e->type == TDX_RAM_ADDED){
|
||||
resource_type = EFI_RESOURCE_SYSTEM_MEMORY;
|
||||
attr = EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE;
|
||||
} else {
|
||||
error_report("unknown TDX_RAM_ENTRY type %d", e->type);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
region = tdvf_get_area(hob, sizeof(*region));
|
||||
*region = (EFI_HOB_RESOURCE_DESCRIPTOR) {
|
||||
.Header = {
|
||||
.HobType = EFI_HOB_TYPE_RESOURCE_DESCRIPTOR,
|
||||
.HobLength = cpu_to_le16(sizeof(*region)),
|
||||
.Reserved = cpu_to_le32(0),
|
||||
},
|
||||
.Owner = EFI_HOB_OWNER_ZERO,
|
||||
.ResourceType = cpu_to_le32(resource_type),
|
||||
.ResourceAttribute = cpu_to_le32(attr),
|
||||
.PhysicalStart = cpu_to_le64(e->address),
|
||||
.ResourceLength = cpu_to_le64(e->length),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob)
|
||||
{
|
||||
TdvfHob hob = {
|
||||
.hob_addr = td_hob->address,
|
||||
.size = td_hob->size,
|
||||
.ptr = td_hob->mem_ptr,
|
||||
|
||||
.current = td_hob->mem_ptr,
|
||||
.end = td_hob->mem_ptr + td_hob->size,
|
||||
};
|
||||
|
||||
EFI_HOB_GENERIC_HEADER *last_hob;
|
||||
EFI_HOB_HANDOFF_INFO_TABLE *hit;
|
||||
|
||||
/* Note, Efi{Free}Memory{Bottom,Top} are ignored, leave 'em zeroed. */
|
||||
hit = tdvf_get_area(&hob, sizeof(*hit));
|
||||
*hit = (EFI_HOB_HANDOFF_INFO_TABLE) {
|
||||
.Header = {
|
||||
.HobType = EFI_HOB_TYPE_HANDOFF,
|
||||
.HobLength = cpu_to_le16(sizeof(*hit)),
|
||||
.Reserved = cpu_to_le32(0),
|
||||
},
|
||||
.Version = cpu_to_le32(EFI_HOB_HANDOFF_TABLE_VERSION),
|
||||
.BootMode = cpu_to_le32(0),
|
||||
.EfiMemoryTop = cpu_to_le64(0),
|
||||
.EfiMemoryBottom = cpu_to_le64(0),
|
||||
.EfiFreeMemoryTop = cpu_to_le64(0),
|
||||
.EfiFreeMemoryBottom = cpu_to_le64(0),
|
||||
.EfiEndOfHobList = cpu_to_le64(0), /* initialized later */
|
||||
};
|
||||
|
||||
tdvf_hob_add_memory_resources(tdx, &hob);
|
||||
|
||||
last_hob = tdvf_get_area(&hob, sizeof(*last_hob));
|
||||
*last_hob = (EFI_HOB_GENERIC_HEADER) {
|
||||
.HobType = EFI_HOB_TYPE_END_OF_HOB_LIST,
|
||||
.HobLength = cpu_to_le16(sizeof(*last_hob)),
|
||||
.Reserved = cpu_to_le32(0),
|
||||
};
|
||||
hit->EfiEndOfHobList = tdvf_current_guest_addr(&hob);
|
||||
}
|
||||
24
hw/i386/tdvf-hob.h
Normal file
24
hw/i386/tdvf-hob.h
Normal file
@@ -0,0 +1,24 @@
|
||||
#ifndef HW_I386_TD_HOB_H
|
||||
#define HW_I386_TD_HOB_H
|
||||
|
||||
#include "hw/i386/tdvf.h"
|
||||
#include "target/i386/kvm/tdx.h"
|
||||
|
||||
void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob);
|
||||
|
||||
#define EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE \
|
||||
(EFI_RESOURCE_ATTRIBUTE_PRESENT | \
|
||||
EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \
|
||||
EFI_RESOURCE_ATTRIBUTE_TESTED)
|
||||
|
||||
#define EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED \
|
||||
(EFI_RESOURCE_ATTRIBUTE_PRESENT | \
|
||||
EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \
|
||||
EFI_RESOURCE_ATTRIBUTE_TESTED)
|
||||
|
||||
#define EFI_RESOURCE_ATTRIBUTE_TDVF_MMIO \
|
||||
(EFI_RESOURCE_ATTRIBUTE_PRESENT | \
|
||||
EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \
|
||||
EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE)
|
||||
|
||||
#endif
|
||||
200
hw/i386/tdvf.c
Normal file
200
hw/i386/tdvf.c
Normal file
@@ -0,0 +1,200 @@
|
||||
/*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
* Copyright (c) 2020 Intel Corporation
|
||||
* Author: Isaku Yamahata <isaku.yamahata at gmail.com>
|
||||
* <isaku.yamahata at intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/error-report.h"
|
||||
|
||||
#include "hw/i386/pc.h"
|
||||
#include "hw/i386/tdvf.h"
|
||||
#include "sysemu/kvm.h"
|
||||
|
||||
#define TDX_METADATA_OFFSET_GUID "e47a6535-984a-4798-865e-4685a7bf8ec2"
|
||||
#define TDX_METADATA_VERSION 1
|
||||
#define TDVF_SIGNATURE 0x46564454 /* TDVF as little endian */
|
||||
|
||||
typedef struct {
|
||||
uint32_t DataOffset;
|
||||
uint32_t RawDataSize;
|
||||
uint64_t MemoryAddress;
|
||||
uint64_t MemoryDataSize;
|
||||
uint32_t Type;
|
||||
uint32_t Attributes;
|
||||
} TdvfSectionEntry;
|
||||
|
||||
typedef struct {
|
||||
uint32_t Signature;
|
||||
uint32_t Length;
|
||||
uint32_t Version;
|
||||
uint32_t NumberOfSectionEntries;
|
||||
TdvfSectionEntry SectionEntries[];
|
||||
} TdvfMetadata;
|
||||
|
||||
struct tdx_metadata_offset {
|
||||
uint32_t offset;
|
||||
};
|
||||
|
||||
static TdvfMetadata *tdvf_get_metadata(void *flash_ptr, int size)
|
||||
{
|
||||
TdvfMetadata *metadata;
|
||||
uint32_t offset = 0;
|
||||
uint8_t *data;
|
||||
|
||||
if ((uint32_t) size != size) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (pc_system_ovmf_table_find(TDX_METADATA_OFFSET_GUID, &data, NULL)) {
|
||||
offset = size - le32_to_cpu(((struct tdx_metadata_offset *)data)->offset);
|
||||
|
||||
if (offset + sizeof(*metadata) > size) {
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
error_report("Cannot find TDX_METADATA_OFFSET_GUID");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
metadata = flash_ptr + offset;
|
||||
|
||||
/* Finally, verify the signature to determine if this is a TDVF image. */
|
||||
metadata->Signature = le32_to_cpu(metadata->Signature);
|
||||
if (metadata->Signature != TDVF_SIGNATURE) {
|
||||
error_report("Invalid TDVF signature in metadata!");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Sanity check that the TDVF doesn't overlap its own metadata. */
|
||||
metadata->Length = le32_to_cpu(metadata->Length);
|
||||
if (offset + metadata->Length > size) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Only version 1 is supported/defined. */
|
||||
metadata->Version = le32_to_cpu(metadata->Version);
|
||||
if (metadata->Version != TDX_METADATA_VERSION) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return metadata;
|
||||
}
|
||||
|
||||
static int tdvf_parse_and_check_section_entry(const TdvfSectionEntry *src,
|
||||
TdxFirmwareEntry *entry)
|
||||
{
|
||||
entry->data_offset = le32_to_cpu(src->DataOffset);
|
||||
entry->data_len = le32_to_cpu(src->RawDataSize);
|
||||
entry->address = le64_to_cpu(src->MemoryAddress);
|
||||
entry->size = le64_to_cpu(src->MemoryDataSize);
|
||||
entry->type = le32_to_cpu(src->Type);
|
||||
entry->attributes = le32_to_cpu(src->Attributes);
|
||||
|
||||
/* sanity check */
|
||||
if (entry->size < entry->data_len) {
|
||||
error_report("Broken metadata RawDataSize 0x%x MemoryDataSize 0x%lx",
|
||||
entry->data_len, entry->size);
|
||||
return -1;
|
||||
}
|
||||
if (!QEMU_IS_ALIGNED(entry->address, TARGET_PAGE_SIZE)) {
|
||||
error_report("MemoryAddress 0x%lx not page aligned", entry->address);
|
||||
return -1;
|
||||
}
|
||||
if (!QEMU_IS_ALIGNED(entry->size, TARGET_PAGE_SIZE)) {
|
||||
error_report("MemoryDataSize 0x%lx not page aligned", entry->size);
|
||||
return -1;
|
||||
}
|
||||
|
||||
switch (entry->type) {
|
||||
case TDVF_SECTION_TYPE_BFV:
|
||||
case TDVF_SECTION_TYPE_CFV:
|
||||
/* The sections that must be copied from firmware image to TD memory */
|
||||
if (entry->data_len == 0) {
|
||||
error_report("%d section with RawDataSize == 0", entry->type);
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
case TDVF_SECTION_TYPE_TD_HOB:
|
||||
case TDVF_SECTION_TYPE_TEMP_MEM:
|
||||
/* The sections that no need to be copied from firmware image */
|
||||
if (entry->data_len != 0) {
|
||||
error_report("%d section with RawDataSize 0x%x != 0",
|
||||
entry->type, entry->data_len);
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
error_report("TDVF contains unsupported section type %d", entry->type);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size)
|
||||
{
|
||||
TdvfSectionEntry *sections;
|
||||
TdvfMetadata *metadata;
|
||||
ssize_t entries_size;
|
||||
uint32_t len, i;
|
||||
|
||||
metadata = tdvf_get_metadata(flash_ptr, size);
|
||||
if (!metadata) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
//load and parse metadata entries
|
||||
fw->nr_entries = le32_to_cpu(metadata->NumberOfSectionEntries);
|
||||
if (fw->nr_entries < 2) {
|
||||
error_report("Invalid number of fw entries (%u) in TDVF", fw->nr_entries);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
len = le32_to_cpu(metadata->Length);
|
||||
entries_size = fw->nr_entries * sizeof(TdvfSectionEntry);
|
||||
if (len != sizeof(*metadata) + entries_size) {
|
||||
error_report("TDVF metadata len (0x%x) mismatch, expected (0x%x)",
|
||||
len, (uint32_t)(sizeof(*metadata) + entries_size));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
fw->entries = g_new(TdxFirmwareEntry, fw->nr_entries);
|
||||
sections = g_new(TdvfSectionEntry, fw->nr_entries);
|
||||
|
||||
if (!memcpy(sections, (void *)metadata + sizeof(*metadata), entries_size)) {
|
||||
error_report("Failed to read TDVF section entries");
|
||||
goto err;
|
||||
}
|
||||
|
||||
for (i = 0; i < fw->nr_entries; i++) {
|
||||
if (tdvf_parse_and_check_section_entry(§ions[i], &fw->entries[i])) {
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
g_free(sections);
|
||||
|
||||
fw->mem_ptr = flash_ptr;
|
||||
return 0;
|
||||
|
||||
err:
|
||||
g_free(sections);
|
||||
fw->entries = 0;
|
||||
g_free(fw->entries);
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -47,6 +47,7 @@
|
||||
#include "hw/intc/i8259.h"
|
||||
#include "hw/rtc/mc146818rtc.h"
|
||||
#include "target/i386/sev.h"
|
||||
#include "kvm/tdx.h"
|
||||
|
||||
#include "hw/acpi/cpu_hotplug.h"
|
||||
#include "hw/irq.h"
|
||||
@@ -1157,9 +1158,17 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
|
||||
(bios_size % 65536) != 0) {
|
||||
goto bios_error;
|
||||
}
|
||||
|
||||
bios = g_malloc(sizeof(*bios));
|
||||
memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal);
|
||||
if (sev_enabled()) {
|
||||
if (is_tdx_vm()) {
|
||||
memory_region_init_ram_guest_memfd(bios, NULL, "pc.bios", bios_size,
|
||||
&error_fatal);
|
||||
tdx_set_tdvf_region(bios);
|
||||
} else {
|
||||
memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal);
|
||||
}
|
||||
|
||||
if (sev_enabled() || is_tdx_vm()) {
|
||||
/*
|
||||
* The concept of a "reset" simply doesn't exist for
|
||||
* confidential computing guests, we have to destroy and
|
||||
@@ -1181,17 +1190,20 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
|
||||
}
|
||||
g_free(filename);
|
||||
|
||||
/* map the last 128KB of the BIOS in ISA space */
|
||||
isa_bios_size = MIN(bios_size, 128 * KiB);
|
||||
isa_bios = g_malloc(sizeof(*isa_bios));
|
||||
memory_region_init_alias(isa_bios, NULL, "isa-bios", bios,
|
||||
bios_size - isa_bios_size, isa_bios_size);
|
||||
memory_region_add_subregion_overlap(rom_memory,
|
||||
0x100000 - isa_bios_size,
|
||||
isa_bios,
|
||||
1);
|
||||
if (!isapc_ram_fw) {
|
||||
memory_region_set_readonly(isa_bios, true);
|
||||
/* For TDX, alias different GPAs to same private memory is not supported */
|
||||
if (!is_tdx_vm()) {
|
||||
/* map the last 128KB of the BIOS in ISA space */
|
||||
isa_bios_size = MIN(bios_size, 128 * KiB);
|
||||
isa_bios = g_malloc(sizeof(*isa_bios));
|
||||
memory_region_init_alias(isa_bios, NULL, "isa-bios", bios,
|
||||
bios_size - isa_bios_size, isa_bios_size);
|
||||
memory_region_add_subregion_overlap(rom_memory,
|
||||
0x100000 - isa_bios_size,
|
||||
isa_bios,
|
||||
1);
|
||||
if (!isapc_ram_fw) {
|
||||
memory_region_set_readonly(isa_bios, true);
|
||||
}
|
||||
}
|
||||
|
||||
/* map all the bios at the top of memory */
|
||||
@@ -1389,6 +1401,17 @@ static void machine_set_sgx_epc(Object *obj, Visitor *v, const char *name,
|
||||
qapi_free_SgxEPCList(list);
|
||||
}
|
||||
|
||||
static int x86_kvm_type(MachineState *ms, const char *vm_type)
|
||||
{
|
||||
X86MachineState *x86ms = X86_MACHINE(ms);
|
||||
int kvm_type;
|
||||
|
||||
kvm_type = kvm_enabled() ? kvm_get_vm_type(ms, vm_type) : 0;
|
||||
x86ms->vm_type = kvm_type;
|
||||
|
||||
return kvm_type;
|
||||
}
|
||||
|
||||
static void x86_machine_initfn(Object *obj)
|
||||
{
|
||||
X86MachineState *x86ms = X86_MACHINE(obj);
|
||||
@@ -1402,6 +1425,7 @@ static void x86_machine_initfn(Object *obj)
|
||||
x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
|
||||
x86ms->bus_lock_ratelimit = 0;
|
||||
x86ms->above_4g_mem_start = 4 * GiB;
|
||||
x86ms->eoi_intercept_unsupported = false;
|
||||
}
|
||||
|
||||
static void x86_machine_class_init(ObjectClass *oc, void *data)
|
||||
@@ -1413,6 +1437,7 @@ static void x86_machine_class_init(ObjectClass *oc, void *data)
|
||||
mc->cpu_index_to_instance_props = x86_cpu_index_to_props;
|
||||
mc->get_default_cpu_node_id = x86_get_default_cpu_node_id;
|
||||
mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids;
|
||||
mc->kvm_type = x86_kvm_type;
|
||||
x86mc->save_tsc_khz = true;
|
||||
x86mc->fwcfg_dma_enabled = true;
|
||||
nc->nmi_monitor_handler = x86_nmi;
|
||||
|
||||
@@ -179,6 +179,8 @@ static Property q35_host_props[] = {
|
||||
mch.below_4g_mem_size, 0),
|
||||
DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MEM_SIZE, Q35PCIHost,
|
||||
mch.above_4g_mem_size, 0),
|
||||
DEFINE_PROP_BOOL(PCI_HOST_PROP_SMM_RANGES, Q35PCIHost,
|
||||
mch.has_smm_ranges, true),
|
||||
DEFINE_PROP_BOOL("x-pci-hole64-fix", Q35PCIHost, pci_hole64_fix, true),
|
||||
DEFINE_PROP_END_OF_LIST(),
|
||||
};
|
||||
@@ -214,6 +216,7 @@ static void q35_host_initfn(Object *obj)
|
||||
/* mch's object_initialize resets the default value, set it again */
|
||||
qdev_prop_set_uint64(DEVICE(s), PCI_HOST_PROP_PCI_HOLE64_SIZE,
|
||||
Q35_PCI_HOST_HOLE64_SIZE_DEFAULT);
|
||||
|
||||
object_property_add(obj, PCI_HOST_PROP_PCI_HOLE_START, "uint32",
|
||||
q35_host_get_pci_hole_start,
|
||||
NULL, NULL, NULL);
|
||||
@@ -476,6 +479,10 @@ static void mch_write_config(PCIDevice *d,
|
||||
mch_update_pciexbar(mch);
|
||||
}
|
||||
|
||||
if (!mch->has_smm_ranges) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (ranges_overlap(address, len, MCH_HOST_BRIDGE_SMRAM,
|
||||
MCH_HOST_BRIDGE_SMRAM_SIZE)) {
|
||||
mch_update_smram(mch);
|
||||
@@ -494,10 +501,13 @@ static void mch_write_config(PCIDevice *d,
|
||||
static void mch_update(MCHPCIState *mch)
|
||||
{
|
||||
mch_update_pciexbar(mch);
|
||||
|
||||
mch_update_pam(mch);
|
||||
mch_update_smram(mch);
|
||||
mch_update_ext_tseg_mbytes(mch);
|
||||
mch_update_smbase_smram(mch);
|
||||
if (mch->has_smm_ranges) {
|
||||
mch_update_smram(mch);
|
||||
mch_update_ext_tseg_mbytes(mch);
|
||||
mch_update_smbase_smram(mch);
|
||||
}
|
||||
|
||||
/*
|
||||
* pci hole goes from end-of-low-ram to io-apic.
|
||||
@@ -538,19 +548,21 @@ static void mch_reset(DeviceState *qdev)
|
||||
pci_set_quad(d->config + MCH_HOST_BRIDGE_PCIEXBAR,
|
||||
MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT);
|
||||
|
||||
d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT;
|
||||
d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT;
|
||||
d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK;
|
||||
d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK;
|
||||
if (mch->has_smm_ranges) {
|
||||
d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT;
|
||||
d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT;
|
||||
d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK;
|
||||
d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK;
|
||||
|
||||
if (mch->ext_tseg_mbytes > 0) {
|
||||
pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES,
|
||||
MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY);
|
||||
if (mch->ext_tseg_mbytes > 0) {
|
||||
pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES,
|
||||
MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY);
|
||||
}
|
||||
|
||||
d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0;
|
||||
d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff;
|
||||
}
|
||||
|
||||
d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0;
|
||||
d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff;
|
||||
|
||||
mch_update(mch);
|
||||
}
|
||||
|
||||
@@ -568,6 +580,20 @@ static void mch_realize(PCIDevice *d, Error **errp)
|
||||
/* setup pci memory mapping */
|
||||
pc_pci_as_mapping_init(mch->system_memory, mch->pci_address_space);
|
||||
|
||||
/* PAM */
|
||||
init_pam(&mch->pam_regions[0], OBJECT(mch), mch->ram_memory,
|
||||
mch->system_memory, mch->pci_address_space,
|
||||
PAM_BIOS_BASE, PAM_BIOS_SIZE);
|
||||
for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) {
|
||||
init_pam(&mch->pam_regions[i + 1], OBJECT(mch), mch->ram_memory,
|
||||
mch->system_memory, mch->pci_address_space,
|
||||
PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE);
|
||||
}
|
||||
|
||||
if (!mch->has_smm_ranges) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* if *disabled* show SMRAM to all CPUs */
|
||||
memory_region_init_alias(&mch->smram_region, OBJECT(mch), "smram-region",
|
||||
mch->pci_address_space, MCH_HOST_BRIDGE_SMRAM_C_BASE,
|
||||
@@ -634,15 +660,6 @@ static void mch_realize(PCIDevice *d, Error **errp)
|
||||
|
||||
object_property_add_const_link(qdev_get_machine(), "smram",
|
||||
OBJECT(&mch->smram));
|
||||
|
||||
init_pam(&mch->pam_regions[0], OBJECT(mch), mch->ram_memory,
|
||||
mch->system_memory, mch->pci_address_space,
|
||||
PAM_BIOS_BASE, PAM_BIOS_SIZE);
|
||||
for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) {
|
||||
init_pam(&mch->pam_regions[i + 1], OBJECT(mch), mch->ram_memory,
|
||||
mch->system_memory, mch->pci_address_space,
|
||||
PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t mch_mcfg_base(void)
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
#include "sysemu/kvm.h"
|
||||
#include "migration/blocker.h"
|
||||
#include "exec/confidential-guest-support.h"
|
||||
#include "hw/ppc/pef.h"
|
||||
|
||||
#define TYPE_PEF_GUEST "pef-guest"
|
||||
OBJECT_DECLARE_SIMPLE_TYPE(PefGuest, PEF_GUEST)
|
||||
@@ -93,7 +92,7 @@ static int kvmppc_svm_off(Error **errp)
|
||||
#endif
|
||||
}
|
||||
|
||||
int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
static int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
{
|
||||
if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) {
|
||||
return 0;
|
||||
@@ -107,7 +106,7 @@ int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
return kvmppc_svm_init(cgs, errp);
|
||||
}
|
||||
|
||||
int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
static int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
{
|
||||
if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) {
|
||||
return 0;
|
||||
@@ -131,6 +130,10 @@ OBJECT_DEFINE_TYPE_WITH_INTERFACES(PefGuest,
|
||||
|
||||
static void pef_guest_class_init(ObjectClass *oc, void *data)
|
||||
{
|
||||
ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
|
||||
|
||||
klass->kvm_init = pef_kvm_init;
|
||||
klass->kvm_reset = pef_kvm_reset;
|
||||
}
|
||||
|
||||
static void pef_guest_init(Object *obj)
|
||||
|
||||
@@ -74,6 +74,7 @@
|
||||
#include "hw/virtio/vhost-scsi-common.h"
|
||||
|
||||
#include "exec/ram_addr.h"
|
||||
#include "exec/confidential-guest-support.h"
|
||||
#include "hw/usb.h"
|
||||
#include "qemu/config-file.h"
|
||||
#include "qemu/error-report.h"
|
||||
@@ -86,7 +87,6 @@
|
||||
#include "hw/ppc/spapr_tpm_proxy.h"
|
||||
#include "hw/ppc/spapr_nvdimm.h"
|
||||
#include "hw/ppc/spapr_numa.h"
|
||||
#include "hw/ppc/pef.h"
|
||||
|
||||
#include "monitor/monitor.h"
|
||||
|
||||
@@ -1687,7 +1687,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason)
|
||||
qemu_guest_getrandom_nofail(spapr->fdt_rng_seed, 32);
|
||||
}
|
||||
|
||||
pef_kvm_reset(machine->cgs, &error_fatal);
|
||||
if (machine->cgs) {
|
||||
confidential_guest_kvm_reset(machine->cgs, &error_fatal);
|
||||
}
|
||||
spapr_caps_apply(spapr);
|
||||
|
||||
first_ppc_cpu = POWERPC_CPU(first_cpu);
|
||||
@@ -2811,7 +2813,9 @@ static void spapr_machine_init(MachineState *machine)
|
||||
/*
|
||||
* if Secure VM (PEF) support is configured, then initialize it
|
||||
*/
|
||||
pef_kvm_init(machine->cgs, &error_fatal);
|
||||
if (machine->cgs) {
|
||||
confidential_guest_kvm_init(machine->cgs, &error_fatal);
|
||||
}
|
||||
|
||||
msi_nonbroken = true;
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include "qemu/osdep.h"
|
||||
#include "qapi/error.h"
|
||||
#include "exec/ram_addr.h"
|
||||
#include "exec/confidential-guest-support.h"
|
||||
#include "hw/s390x/s390-virtio-hcall.h"
|
||||
#include "hw/s390x/sclp.h"
|
||||
#include "hw/s390x/s390_flic.h"
|
||||
@@ -260,7 +261,9 @@ static void ccw_init(MachineState *machine)
|
||||
s390_init_cpus(machine);
|
||||
|
||||
/* Need CPU model to be determined before we can set up PV */
|
||||
s390_pv_init(machine->cgs, &error_fatal);
|
||||
if (machine->cgs) {
|
||||
confidential_guest_kvm_init(machine->cgs, &error_fatal);
|
||||
}
|
||||
|
||||
s390_flic_init();
|
||||
|
||||
|
||||
@@ -23,7 +23,10 @@
|
||||
#include "qom/object.h"
|
||||
|
||||
#define TYPE_CONFIDENTIAL_GUEST_SUPPORT "confidential-guest-support"
|
||||
OBJECT_DECLARE_SIMPLE_TYPE(ConfidentialGuestSupport, CONFIDENTIAL_GUEST_SUPPORT)
|
||||
OBJECT_DECLARE_TYPE(ConfidentialGuestSupport,
|
||||
ConfidentialGuestSupportClass,
|
||||
CONFIDENTIAL_GUEST_SUPPORT)
|
||||
|
||||
|
||||
struct ConfidentialGuestSupport {
|
||||
Object parent;
|
||||
@@ -55,8 +58,37 @@ struct ConfidentialGuestSupport {
|
||||
|
||||
typedef struct ConfidentialGuestSupportClass {
|
||||
ObjectClass parent;
|
||||
|
||||
int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp);
|
||||
int (*kvm_reset)(ConfidentialGuestSupport *cgs, Error **errp);
|
||||
} ConfidentialGuestSupportClass;
|
||||
|
||||
static inline int confidential_guest_kvm_init(ConfidentialGuestSupport *cgs,
|
||||
Error **errp)
|
||||
{
|
||||
ConfidentialGuestSupportClass *klass;
|
||||
|
||||
klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs);
|
||||
if (klass->kvm_init) {
|
||||
return klass->kvm_init(cgs, errp);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int confidential_guest_kvm_reset(ConfidentialGuestSupport *cgs,
|
||||
Error **errp)
|
||||
{
|
||||
ConfidentialGuestSupportClass *klass;
|
||||
|
||||
klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs);
|
||||
if (klass->kvm_reset) {
|
||||
return klass->kvm_reset(cgs, errp);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* !CONFIG_USER_ONLY */
|
||||
|
||||
#endif /* QEMU_CONFIDENTIAL_GUEST_SUPPORT_H */
|
||||
|
||||
@@ -166,6 +166,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque);
|
||||
|
||||
int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
|
||||
int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
|
||||
int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start,
|
||||
size_t length);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -243,6 +243,9 @@ typedef struct IOMMUTLBEvent {
|
||||
/* RAM FD is opened read-only */
|
||||
#define RAM_READONLY_FD (1 << 11)
|
||||
|
||||
/* RAM can be private that has kvm guest memfd backend */
|
||||
#define RAM_GUEST_MEMFD (1 << 12)
|
||||
|
||||
static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
|
||||
IOMMUNotifierFlag flags,
|
||||
hwaddr start, hwaddr end,
|
||||
@@ -1307,7 +1310,8 @@ bool memory_region_init_ram_nomigrate(MemoryRegion *mr,
|
||||
* @name: Region name, becomes part of RAMBlock name used in migration stream
|
||||
* must be unique within any device
|
||||
* @size: size of the region.
|
||||
* @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE.
|
||||
* @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE,
|
||||
* RAM_GUEST_MEMFD.
|
||||
* @errp: pointer to Error*, to store an error if it happens.
|
||||
*
|
||||
* Note that this function does not do anything to cause the data in the
|
||||
@@ -1369,7 +1373,7 @@ bool memory_region_init_resizeable_ram(MemoryRegion *mr,
|
||||
* (getpagesize()) will be used.
|
||||
* @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
|
||||
* RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
|
||||
* RAM_READONLY_FD
|
||||
* RAM_READONLY_FD, RAM_GUEST_MEMFD
|
||||
* @path: the path in which to allocate the RAM.
|
||||
* @offset: offset within the file referenced by path
|
||||
* @errp: pointer to Error*, to store an error if it happens.
|
||||
@@ -1399,7 +1403,7 @@ bool memory_region_init_ram_from_file(MemoryRegion *mr,
|
||||
* @size: size of the region.
|
||||
* @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
|
||||
* RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
|
||||
* RAM_READONLY_FD
|
||||
* RAM_READONLY_FD, RAM_GUEST_MEMFD
|
||||
* @fd: the fd to mmap.
|
||||
* @offset: offset within the file referenced by fd
|
||||
* @errp: pointer to Error*, to store an error if it happens.
|
||||
@@ -1599,6 +1603,12 @@ bool memory_region_init_ram(MemoryRegion *mr,
|
||||
uint64_t size,
|
||||
Error **errp);
|
||||
|
||||
bool memory_region_init_ram_guest_memfd(MemoryRegion *mr,
|
||||
Object *owner,
|
||||
const char *name,
|
||||
uint64_t size,
|
||||
Error **errp);
|
||||
|
||||
/**
|
||||
* memory_region_init_rom: Initialize a ROM memory region.
|
||||
*
|
||||
@@ -1722,6 +1732,16 @@ static inline bool memory_region_is_romd(MemoryRegion *mr)
|
||||
*/
|
||||
bool memory_region_is_protected(MemoryRegion *mr);
|
||||
|
||||
/**
|
||||
* memory_region_has_guest_memfd: check whether a memory region has guest_memfd
|
||||
* associated
|
||||
*
|
||||
* Returns %true if a memory region's ram_block has valid guest_memfd assigned.
|
||||
*
|
||||
* @mr: the memory region being queried
|
||||
*/
|
||||
bool memory_region_has_guest_memfd(MemoryRegion *mr);
|
||||
|
||||
/**
|
||||
* memory_region_get_iommu: check whether a memory region is an iommu
|
||||
*
|
||||
|
||||
@@ -109,7 +109,7 @@ long qemu_maxrampagesize(void);
|
||||
* @mr: the memory region where the ram block is
|
||||
* @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
|
||||
* RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
|
||||
* RAM_READONLY_FD
|
||||
* RAM_READONLY_FD, RAM_GUEST_MEMFD
|
||||
* @mem_path or @fd: specify the backing file or device
|
||||
* @offset: Offset into target file
|
||||
* @errp: pointer to Error*, to store an error if it happens
|
||||
|
||||
@@ -41,6 +41,7 @@ struct RAMBlock {
|
||||
QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
|
||||
int fd;
|
||||
uint64_t fd_offset;
|
||||
int guest_memfd;
|
||||
size_t page_size;
|
||||
/* dirty bitmap used during migration */
|
||||
unsigned long *bmap;
|
||||
|
||||
@@ -36,6 +36,7 @@ bool machine_usb(MachineState *machine);
|
||||
int machine_phandle_start(MachineState *machine);
|
||||
bool machine_dump_guest_core(MachineState *machine);
|
||||
bool machine_mem_merge(MachineState *machine);
|
||||
bool machine_require_guest_memfd(MachineState *machine);
|
||||
HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine);
|
||||
void machine_set_cpu_numa_node(MachineState *machine,
|
||||
const CpuInstanceProperties *props,
|
||||
@@ -370,6 +371,7 @@ struct MachineState {
|
||||
char *dt_compatible;
|
||||
bool dump_guest_core;
|
||||
bool mem_merge;
|
||||
bool require_guest_memfd;
|
||||
bool usb;
|
||||
bool usb_disabled;
|
||||
char *firmware;
|
||||
|
||||
@@ -161,6 +161,7 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level);
|
||||
#define PCI_HOST_PROP_PCI_HOLE64_SIZE "pci-hole64-size"
|
||||
#define PCI_HOST_BELOW_4G_MEM_SIZE "below-4g-mem-size"
|
||||
#define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size"
|
||||
#define PCI_HOST_PROP_SMM_RANGES "smm-ranges"
|
||||
|
||||
|
||||
void pc_pci_as_mapping_init(MemoryRegion *system_memory,
|
||||
|
||||
58
include/hw/i386/tdvf.h
Normal file
58
include/hw/i386/tdvf.h
Normal file
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
* Copyright (c) 2020 Intel Corporation
|
||||
* Author: Isaku Yamahata <isaku.yamahata at gmail.com>
|
||||
* <isaku.yamahata at intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef HW_I386_TDVF_H
|
||||
#define HW_I386_TDVF_H
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
|
||||
#define TDVF_SECTION_TYPE_BFV 0
|
||||
#define TDVF_SECTION_TYPE_CFV 1
|
||||
#define TDVF_SECTION_TYPE_TD_HOB 2
|
||||
#define TDVF_SECTION_TYPE_TEMP_MEM 3
|
||||
|
||||
#define TDVF_SECTION_ATTRIBUTES_MR_EXTEND (1U << 0)
|
||||
#define TDVF_SECTION_ATTRIBUTES_PAGE_AUG (1U << 1)
|
||||
|
||||
typedef struct TdxFirmwareEntry {
|
||||
uint32_t data_offset;
|
||||
uint32_t data_len;
|
||||
uint64_t address;
|
||||
uint64_t size;
|
||||
uint32_t type;
|
||||
uint32_t attributes;
|
||||
|
||||
void *mem_ptr;
|
||||
} TdxFirmwareEntry;
|
||||
|
||||
typedef struct TdxFirmware {
|
||||
void *mem_ptr;
|
||||
|
||||
uint32_t nr_entries;
|
||||
TdxFirmwareEntry *entries;
|
||||
} TdxFirmware;
|
||||
|
||||
#define for_each_tdx_fw_entry(fw, e) \
|
||||
for (e = (fw)->entries; e != (fw)->entries + (fw)->nr_entries; e++)
|
||||
|
||||
int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size);
|
||||
|
||||
#endif /* HW_I386_TDVF_H */
|
||||
@@ -43,6 +43,7 @@ struct X86MachineState {
|
||||
MachineState parent;
|
||||
|
||||
/*< public >*/
|
||||
unsigned int vm_type;
|
||||
|
||||
/* Pointers to devices and objects: */
|
||||
ISADevice *rtc;
|
||||
@@ -59,6 +60,7 @@ struct X86MachineState {
|
||||
uint64_t above_4g_mem_start;
|
||||
|
||||
/* CPU and apic information: */
|
||||
bool eoi_intercept_unsupported;
|
||||
unsigned pci_irq_mask;
|
||||
unsigned apic_id_limit;
|
||||
uint16_t boot_cpus;
|
||||
|
||||
@@ -50,6 +50,7 @@ struct MCHPCIState {
|
||||
MemoryRegion tseg_blackhole, tseg_window;
|
||||
MemoryRegion smbase_blackhole, smbase_window;
|
||||
bool has_smram_at_smbase;
|
||||
bool has_smm_ranges;
|
||||
Range pci_hole;
|
||||
uint64_t below_4g_mem_size;
|
||||
uint64_t above_4g_mem_size;
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
/*
|
||||
* PEF (Protected Execution Facility) for POWER support
|
||||
*
|
||||
* Copyright Red Hat.
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef HW_PPC_PEF_H
|
||||
#define HW_PPC_PEF_H
|
||||
|
||||
int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);
|
||||
int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp);
|
||||
|
||||
#endif /* HW_PPC_PEF_H */
|
||||
@@ -53,7 +53,7 @@ extern "C" {
|
||||
* Format modifiers may change any property of the buffer, including the number
|
||||
* of planes and/or the required allocation size. Format modifiers are
|
||||
* vendor-namespaced, and as such the relationship between a fourcc code and a
|
||||
* modifier is specific to the modifer being used. For example, some modifiers
|
||||
* modifier is specific to the modifier being used. For example, some modifiers
|
||||
* may preserve meaning - such as number of planes - from the fourcc code,
|
||||
* whereas others may not.
|
||||
*
|
||||
@@ -78,7 +78,7 @@ extern "C" {
|
||||
* format.
|
||||
* - Higher-level programs interfacing with KMS/GBM/EGL/Vulkan/etc: these users
|
||||
* see modifiers as opaque tokens they can check for equality and intersect.
|
||||
* These users musn't need to know to reason about the modifier value
|
||||
* These users mustn't need to know to reason about the modifier value
|
||||
* (i.e. they are not expected to extract information out of the modifier).
|
||||
*
|
||||
* Vendors should document their modifier usage in as much detail as
|
||||
@@ -539,7 +539,7 @@ extern "C" {
|
||||
* This is a tiled layout using 4Kb tiles in row-major layout.
|
||||
* Within the tile pixels are laid out in 16 256 byte units / sub-tiles which
|
||||
* are arranged in four groups (two wide, two high) with column-major layout.
|
||||
* Each group therefore consits out of four 256 byte units, which are also laid
|
||||
* Each group therefore consists out of four 256 byte units, which are also laid
|
||||
* out as 2x2 column-major.
|
||||
* 256 byte units are made out of four 64 byte blocks of pixels, producing
|
||||
* either a square block or a 2:1 unit.
|
||||
@@ -1102,7 +1102,7 @@ drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier)
|
||||
*/
|
||||
|
||||
/*
|
||||
* The top 4 bits (out of the 56 bits alloted for specifying vendor specific
|
||||
* The top 4 bits (out of the 56 bits allotted for specifying vendor specific
|
||||
* modifiers) denote the category for modifiers. Currently we have three
|
||||
* categories of modifiers ie AFBC, MISC and AFRC. We can have a maximum of
|
||||
* sixteen different categories.
|
||||
@@ -1418,7 +1418,7 @@ drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier)
|
||||
* Amlogic FBC Memory Saving mode
|
||||
*
|
||||
* Indicates the storage is packed when pixel size is multiple of word
|
||||
* boudaries, i.e. 8bit should be stored in this mode to save allocation
|
||||
* boundaries, i.e. 8bit should be stored in this mode to save allocation
|
||||
* memory.
|
||||
*
|
||||
* This mode reduces body layout to 3072 bytes per 64x32 superblock with
|
||||
|
||||
@@ -1266,6 +1266,8 @@ struct ethtool_rxfh_indir {
|
||||
* hardware hash key.
|
||||
* @hfunc: Defines the current RSS hash function used by HW (or to be set to).
|
||||
* Valid values are one of the %ETH_RSS_HASH_*.
|
||||
* @input_xfrm: Defines how the input data is transformed. Valid values are one
|
||||
* of %RXH_XFRM_*.
|
||||
* @rsvd8: Reserved for future use; see the note on reserved space.
|
||||
* @rsvd32: Reserved for future use; see the note on reserved space.
|
||||
* @rss_config: RX ring/queue index for each hash value i.e., indirection table
|
||||
@@ -1285,7 +1287,8 @@ struct ethtool_rxfh {
|
||||
uint32_t indir_size;
|
||||
uint32_t key_size;
|
||||
uint8_t hfunc;
|
||||
uint8_t rsvd8[3];
|
||||
uint8_t input_xfrm;
|
||||
uint8_t rsvd8[2];
|
||||
uint32_t rsvd32;
|
||||
uint32_t rss_config[];
|
||||
};
|
||||
@@ -1992,6 +1995,15 @@ static inline int ethtool_validate_duplex(uint8_t duplex)
|
||||
|
||||
#define WOL_MODE_COUNT 8
|
||||
|
||||
/* RSS hash function data
|
||||
* XOR the corresponding source and destination fields of each specified
|
||||
* protocol. Both copies of the XOR'ed fields are fed into the RSS and RXHASH
|
||||
* calculation. Note that this XORing reduces the input set entropy and could
|
||||
* be exploited to reduce the RSS queue spread.
|
||||
*/
|
||||
#define RXH_XFRM_SYM_XOR (1 << 0)
|
||||
#define RXH_XFRM_NO_CHANGE 0xff
|
||||
|
||||
/* L2-L4 network traffic flow types */
|
||||
#define TCP_V4_FLOW 0x01 /* hash or spec (tcp_ip4_spec) */
|
||||
#define UDP_V4_FLOW 0x02 /* hash or spec (udp_ip4_spec) */
|
||||
@@ -2128,18 +2140,6 @@ enum ethtool_reset_flags {
|
||||
* refused. For drivers: ignore this field (use kernel's
|
||||
* __ETHTOOL_LINK_MODE_MASK_NBITS instead), any change to it will
|
||||
* be overwritten by kernel.
|
||||
* @supported: Bitmap with each bit meaning given by
|
||||
* %ethtool_link_mode_bit_indices for the link modes, physical
|
||||
* connectors and other link features for which the interface
|
||||
* supports autonegotiation or auto-detection. Read-only.
|
||||
* @advertising: Bitmap with each bit meaning given by
|
||||
* %ethtool_link_mode_bit_indices for the link modes, physical
|
||||
* connectors and other link features that are advertised through
|
||||
* autonegotiation or enabled for auto-detection.
|
||||
* @lp_advertising: Bitmap with each bit meaning given by
|
||||
* %ethtool_link_mode_bit_indices for the link modes, and other
|
||||
* link features that the link partner advertised through
|
||||
* autonegotiation; 0 if unknown or not applicable. Read-only.
|
||||
* @transceiver: Used to distinguish different possible PHY types,
|
||||
* reported consistently by PHYLIB. Read-only.
|
||||
* @master_slave_cfg: Master/slave port mode.
|
||||
@@ -2181,6 +2181,21 @@ enum ethtool_reset_flags {
|
||||
* %set_link_ksettings() should validate all fields other than @cmd
|
||||
* and @link_mode_masks_nwords that are not described as read-only or
|
||||
* deprecated, and must ignore all fields described as read-only.
|
||||
*
|
||||
* @link_mode_masks is divided into three bitfields, each of length
|
||||
* @link_mode_masks_nwords:
|
||||
* - supported: Bitmap with each bit meaning given by
|
||||
* %ethtool_link_mode_bit_indices for the link modes, physical
|
||||
* connectors and other link features for which the interface
|
||||
* supports autonegotiation or auto-detection. Read-only.
|
||||
* - advertising: Bitmap with each bit meaning given by
|
||||
* %ethtool_link_mode_bit_indices for the link modes, physical
|
||||
* connectors and other link features that are advertised through
|
||||
* autonegotiation or enabled for auto-detection.
|
||||
* - lp_advertising: Bitmap with each bit meaning given by
|
||||
* %ethtool_link_mode_bit_indices for the link modes, and other
|
||||
* link features that the link partner advertised through
|
||||
* autonegotiation; 0 if unknown or not applicable. Read-only.
|
||||
*/
|
||||
struct ethtool_link_settings {
|
||||
uint32_t cmd;
|
||||
|
||||
@@ -52,7 +52,7 @@
|
||||
* rest are per-device feature bits.
|
||||
*/
|
||||
#define VIRTIO_TRANSPORT_F_START 28
|
||||
#define VIRTIO_TRANSPORT_F_END 41
|
||||
#define VIRTIO_TRANSPORT_F_END 42
|
||||
|
||||
#ifndef VIRTIO_CONFIG_NO_LEGACY
|
||||
/* Do we get callbacks when the ring is completely used, even if we've
|
||||
@@ -112,4 +112,10 @@
|
||||
* This feature indicates that the driver can reset a queue individually.
|
||||
*/
|
||||
#define VIRTIO_F_RING_RESET 40
|
||||
|
||||
/*
|
||||
* This feature indicates that the device support administration virtqueues.
|
||||
*/
|
||||
#define VIRTIO_F_ADMIN_VQ 41
|
||||
|
||||
#endif /* _LINUX_VIRTIO_CONFIG_H */
|
||||
|
||||
@@ -175,6 +175,9 @@ struct virtio_pci_modern_common_cfg {
|
||||
|
||||
uint16_t queue_notify_data; /* read-write */
|
||||
uint16_t queue_reset; /* read-write */
|
||||
|
||||
uint16_t admin_queue_index; /* read-only */
|
||||
uint16_t admin_queue_num; /* read-only */
|
||||
};
|
||||
|
||||
/* Fields in VIRTIO_PCI_CAP_PCI_CFG: */
|
||||
@@ -215,7 +218,72 @@ struct virtio_pci_cfg_cap {
|
||||
#define VIRTIO_PCI_COMMON_Q_USEDHI 52
|
||||
#define VIRTIO_PCI_COMMON_Q_NDATA 56
|
||||
#define VIRTIO_PCI_COMMON_Q_RESET 58
|
||||
#define VIRTIO_PCI_COMMON_ADM_Q_IDX 60
|
||||
#define VIRTIO_PCI_COMMON_ADM_Q_NUM 62
|
||||
|
||||
#endif /* VIRTIO_PCI_NO_MODERN */
|
||||
|
||||
/* Admin command status. */
|
||||
#define VIRTIO_ADMIN_STATUS_OK 0
|
||||
|
||||
/* Admin command opcode. */
|
||||
#define VIRTIO_ADMIN_CMD_LIST_QUERY 0x0
|
||||
#define VIRTIO_ADMIN_CMD_LIST_USE 0x1
|
||||
|
||||
/* Admin command group type. */
|
||||
#define VIRTIO_ADMIN_GROUP_TYPE_SRIOV 0x1
|
||||
|
||||
/* Transitional device admin command. */
|
||||
#define VIRTIO_ADMIN_CMD_LEGACY_COMMON_CFG_WRITE 0x2
|
||||
#define VIRTIO_ADMIN_CMD_LEGACY_COMMON_CFG_READ 0x3
|
||||
#define VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_WRITE 0x4
|
||||
#define VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_READ 0x5
|
||||
#define VIRTIO_ADMIN_CMD_LEGACY_NOTIFY_INFO 0x6
|
||||
|
||||
struct QEMU_PACKED virtio_admin_cmd_hdr {
|
||||
uint16_t opcode;
|
||||
/*
|
||||
* 1 - SR-IOV
|
||||
* 2-65535 - reserved
|
||||
*/
|
||||
uint16_t group_type;
|
||||
/* Unused, reserved for future extensions. */
|
||||
uint8_t reserved1[12];
|
||||
uint64_t group_member_id;
|
||||
};
|
||||
|
||||
struct QEMU_PACKED virtio_admin_cmd_status {
|
||||
uint16_t status;
|
||||
uint16_t status_qualifier;
|
||||
/* Unused, reserved for future extensions. */
|
||||
uint8_t reserved2[4];
|
||||
};
|
||||
|
||||
struct QEMU_PACKED virtio_admin_cmd_legacy_wr_data {
|
||||
uint8_t offset; /* Starting offset of the register(s) to write. */
|
||||
uint8_t reserved[7];
|
||||
uint8_t registers[];
|
||||
};
|
||||
|
||||
struct QEMU_PACKED virtio_admin_cmd_legacy_rd_data {
|
||||
uint8_t offset; /* Starting offset of the register(s) to read. */
|
||||
};
|
||||
|
||||
#define VIRTIO_ADMIN_CMD_NOTIFY_INFO_FLAGS_END 0
|
||||
#define VIRTIO_ADMIN_CMD_NOTIFY_INFO_FLAGS_OWNER_DEV 0x1
|
||||
#define VIRTIO_ADMIN_CMD_NOTIFY_INFO_FLAGS_OWNER_MEM 0x2
|
||||
|
||||
#define VIRTIO_ADMIN_CMD_MAX_NOTIFY_INFO 4
|
||||
|
||||
struct QEMU_PACKED virtio_admin_cmd_notify_info_data {
|
||||
uint8_t flags; /* 0 = end of list, 1 = owner device, 2 = member device */
|
||||
uint8_t bar; /* BAR of the member or the owner device */
|
||||
uint8_t padding[6];
|
||||
uint64_t offset; /* Offset within bar. */
|
||||
};
|
||||
|
||||
struct virtio_admin_cmd_notify_info_result {
|
||||
struct virtio_admin_cmd_notify_info_data entries[VIRTIO_ADMIN_CMD_MAX_NOTIFY_INFO];
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -14,6 +14,13 @@
|
||||
#include "standard-headers/linux/virtio_ids.h"
|
||||
#include "standard-headers/linux/virtio_config.h"
|
||||
|
||||
/* Feature bits */
|
||||
/* guest physical address range will be indicated as shared memory region 0 */
|
||||
#define VIRTIO_PMEM_F_SHMEM_REGION 0
|
||||
|
||||
/* shmid of the shared memory region corresponding to the pmem */
|
||||
#define VIRTIO_PMEM_SHMEM_REGION_ID 0
|
||||
|
||||
struct virtio_pmem_config {
|
||||
uint64_t start;
|
||||
uint64_t size;
|
||||
|
||||
198
include/standard-headers/uefi/uefi.h
Normal file
198
include/standard-headers/uefi/uefi.h
Normal file
@@ -0,0 +1,198 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* Author: Isaku Yamahata <isaku.yamahata at gmail.com>
|
||||
* <isaku.yamahata at intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef HW_I386_UEFI_H
|
||||
#define HW_I386_UEFI_H
|
||||
|
||||
/***************************************************************************/
|
||||
/*
|
||||
* basic EFI definitions
|
||||
* supplemented with UEFI Specification Version 2.8 (Errata A)
|
||||
* released February 2020
|
||||
*/
|
||||
/* UEFI integer is little endian */
|
||||
|
||||
typedef struct {
|
||||
uint32_t Data1;
|
||||
uint16_t Data2;
|
||||
uint16_t Data3;
|
||||
uint8_t Data4[8];
|
||||
} EFI_GUID;
|
||||
|
||||
typedef enum {
|
||||
EfiReservedMemoryType,
|
||||
EfiLoaderCode,
|
||||
EfiLoaderData,
|
||||
EfiBootServicesCode,
|
||||
EfiBootServicesData,
|
||||
EfiRuntimeServicesCode,
|
||||
EfiRuntimeServicesData,
|
||||
EfiConventionalMemory,
|
||||
EfiUnusableMemory,
|
||||
EfiACPIReclaimMemory,
|
||||
EfiACPIMemoryNVS,
|
||||
EfiMemoryMappedIO,
|
||||
EfiMemoryMappedIOPortSpace,
|
||||
EfiPalCode,
|
||||
EfiPersistentMemory,
|
||||
EfiUnacceptedMemoryType,
|
||||
EfiMaxMemoryType
|
||||
} EFI_MEMORY_TYPE;
|
||||
|
||||
#define EFI_HOB_HANDOFF_TABLE_VERSION 0x0009
|
||||
|
||||
#define EFI_HOB_TYPE_HANDOFF 0x0001
|
||||
#define EFI_HOB_TYPE_MEMORY_ALLOCATION 0x0002
|
||||
#define EFI_HOB_TYPE_RESOURCE_DESCRIPTOR 0x0003
|
||||
#define EFI_HOB_TYPE_GUID_EXTENSION 0x0004
|
||||
#define EFI_HOB_TYPE_FV 0x0005
|
||||
#define EFI_HOB_TYPE_CPU 0x0006
|
||||
#define EFI_HOB_TYPE_MEMORY_POOL 0x0007
|
||||
#define EFI_HOB_TYPE_FV2 0x0009
|
||||
#define EFI_HOB_TYPE_LOAD_PEIM_UNUSED 0x000A
|
||||
#define EFI_HOB_TYPE_UEFI_CAPSULE 0x000B
|
||||
#define EFI_HOB_TYPE_FV3 0x000C
|
||||
#define EFI_HOB_TYPE_UNUSED 0xFFFE
|
||||
#define EFI_HOB_TYPE_END_OF_HOB_LIST 0xFFFF
|
||||
|
||||
typedef struct {
|
||||
uint16_t HobType;
|
||||
uint16_t HobLength;
|
||||
uint32_t Reserved;
|
||||
} EFI_HOB_GENERIC_HEADER;
|
||||
|
||||
typedef uint64_t EFI_PHYSICAL_ADDRESS;
|
||||
typedef uint32_t EFI_BOOT_MODE;
|
||||
|
||||
typedef struct {
|
||||
EFI_HOB_GENERIC_HEADER Header;
|
||||
uint32_t Version;
|
||||
EFI_BOOT_MODE BootMode;
|
||||
EFI_PHYSICAL_ADDRESS EfiMemoryTop;
|
||||
EFI_PHYSICAL_ADDRESS EfiMemoryBottom;
|
||||
EFI_PHYSICAL_ADDRESS EfiFreeMemoryTop;
|
||||
EFI_PHYSICAL_ADDRESS EfiFreeMemoryBottom;
|
||||
EFI_PHYSICAL_ADDRESS EfiEndOfHobList;
|
||||
} EFI_HOB_HANDOFF_INFO_TABLE;
|
||||
|
||||
#define EFI_RESOURCE_SYSTEM_MEMORY 0x00000000
|
||||
#define EFI_RESOURCE_MEMORY_MAPPED_IO 0x00000001
|
||||
#define EFI_RESOURCE_IO 0x00000002
|
||||
#define EFI_RESOURCE_FIRMWARE_DEVICE 0x00000003
|
||||
#define EFI_RESOURCE_MEMORY_MAPPED_IO_PORT 0x00000004
|
||||
#define EFI_RESOURCE_MEMORY_RESERVED 0x00000005
|
||||
#define EFI_RESOURCE_IO_RESERVED 0x00000006
|
||||
#define EFI_RESOURCE_MEMORY_UNACCEPTED 0x00000007
|
||||
#define EFI_RESOURCE_MAX_MEMORY_TYPE 0x00000008
|
||||
|
||||
#define EFI_RESOURCE_ATTRIBUTE_PRESENT 0x00000001
|
||||
#define EFI_RESOURCE_ATTRIBUTE_INITIALIZED 0x00000002
|
||||
#define EFI_RESOURCE_ATTRIBUTE_TESTED 0x00000004
|
||||
#define EFI_RESOURCE_ATTRIBUTE_SINGLE_BIT_ECC 0x00000008
|
||||
#define EFI_RESOURCE_ATTRIBUTE_MULTIPLE_BIT_ECC 0x00000010
|
||||
#define EFI_RESOURCE_ATTRIBUTE_ECC_RESERVED_1 0x00000020
|
||||
#define EFI_RESOURCE_ATTRIBUTE_ECC_RESERVED_2 0x00000040
|
||||
#define EFI_RESOURCE_ATTRIBUTE_READ_PROTECTED 0x00000080
|
||||
#define EFI_RESOURCE_ATTRIBUTE_WRITE_PROTECTED 0x00000100
|
||||
#define EFI_RESOURCE_ATTRIBUTE_EXECUTION_PROTECTED 0x00000200
|
||||
#define EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE 0x00000400
|
||||
#define EFI_RESOURCE_ATTRIBUTE_WRITE_COMBINEABLE 0x00000800
|
||||
#define EFI_RESOURCE_ATTRIBUTE_WRITE_THROUGH_CACHEABLE 0x00001000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_WRITE_BACK_CACHEABLE 0x00002000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_16_BIT_IO 0x00004000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_32_BIT_IO 0x00008000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_64_BIT_IO 0x00010000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_UNCACHED_EXPORTED 0x00020000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_READ_ONLY_PROTECTED 0x00040000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_READ_ONLY_PROTECTABLE 0x00080000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_READ_PROTECTABLE 0x00100000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_WRITE_PROTECTABLE 0x00200000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_EXECUTION_PROTECTABLE 0x00400000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_PERSISTENT 0x00800000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_PERSISTABLE 0x01000000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_MORE_RELIABLE 0x02000000
|
||||
|
||||
typedef uint32_t EFI_RESOURCE_TYPE;
|
||||
typedef uint32_t EFI_RESOURCE_ATTRIBUTE_TYPE;
|
||||
|
||||
typedef struct {
|
||||
EFI_HOB_GENERIC_HEADER Header;
|
||||
EFI_GUID Owner;
|
||||
EFI_RESOURCE_TYPE ResourceType;
|
||||
EFI_RESOURCE_ATTRIBUTE_TYPE ResourceAttribute;
|
||||
EFI_PHYSICAL_ADDRESS PhysicalStart;
|
||||
uint64_t ResourceLength;
|
||||
} EFI_HOB_RESOURCE_DESCRIPTOR;
|
||||
|
||||
typedef struct {
|
||||
EFI_HOB_GENERIC_HEADER Header;
|
||||
EFI_GUID Name;
|
||||
|
||||
/* guid specific data follows */
|
||||
} EFI_HOB_GUID_TYPE;
|
||||
|
||||
typedef struct {
|
||||
EFI_HOB_GENERIC_HEADER Header;
|
||||
EFI_PHYSICAL_ADDRESS BaseAddress;
|
||||
uint64_t Length;
|
||||
} EFI_HOB_FIRMWARE_VOLUME;
|
||||
|
||||
typedef struct {
|
||||
EFI_HOB_GENERIC_HEADER Header;
|
||||
EFI_PHYSICAL_ADDRESS BaseAddress;
|
||||
uint64_t Length;
|
||||
EFI_GUID FvName;
|
||||
EFI_GUID FileName;
|
||||
} EFI_HOB_FIRMWARE_VOLUME2;
|
||||
|
||||
typedef struct {
|
||||
EFI_HOB_GENERIC_HEADER Header;
|
||||
EFI_PHYSICAL_ADDRESS BaseAddress;
|
||||
uint64_t Length;
|
||||
uint32_t AuthenticationStatus;
|
||||
bool ExtractedFv;
|
||||
EFI_GUID FvName;
|
||||
EFI_GUID FileName;
|
||||
} EFI_HOB_FIRMWARE_VOLUME3;
|
||||
|
||||
typedef struct {
|
||||
EFI_HOB_GENERIC_HEADER Header;
|
||||
uint8_t SizeOfMemorySpace;
|
||||
uint8_t SizeOfIoSpace;
|
||||
uint8_t Reserved[6];
|
||||
} EFI_HOB_CPU;
|
||||
|
||||
typedef struct {
|
||||
EFI_HOB_GENERIC_HEADER Header;
|
||||
} EFI_HOB_MEMORY_POOL;
|
||||
|
||||
typedef struct {
|
||||
EFI_HOB_GENERIC_HEADER Header;
|
||||
|
||||
EFI_PHYSICAL_ADDRESS BaseAddress;
|
||||
uint64_t Length;
|
||||
} EFI_HOB_UEFI_CAPSULE;
|
||||
|
||||
#define EFI_HOB_OWNER_ZERO \
|
||||
((EFI_GUID){ 0x00000000, 0x0000, 0x0000, \
|
||||
{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } })
|
||||
|
||||
#endif
|
||||
@@ -74,6 +74,7 @@ struct HostMemoryBackend {
|
||||
uint64_t size;
|
||||
bool merge, dump, use_canonical_path;
|
||||
bool prealloc, is_mapped, share, reserve;
|
||||
bool guest_memfd;
|
||||
uint32_t prealloc_threads;
|
||||
ThreadContext *prealloc_context;
|
||||
DECLARE_BITMAP(host_nodes, MAX_NODES + 1);
|
||||
|
||||
@@ -341,6 +341,7 @@ int kvm_arch_get_default_type(MachineState *ms);
|
||||
|
||||
int kvm_arch_init(MachineState *ms, KVMState *s);
|
||||
|
||||
int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp);
|
||||
int kvm_arch_init_vcpu(CPUState *cpu);
|
||||
int kvm_arch_destroy_vcpu(CPUState *cpu);
|
||||
|
||||
@@ -544,4 +545,11 @@ uint32_t kvm_dirty_ring_size(void);
|
||||
* reported for the VM.
|
||||
*/
|
||||
bool kvm_hwpoisoned_mem(void);
|
||||
|
||||
int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp);
|
||||
|
||||
int kvm_set_memory_attributes_private(hwaddr start, hwaddr size);
|
||||
int kvm_set_memory_attributes_shared(hwaddr start, hwaddr size);
|
||||
|
||||
int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private);
|
||||
#endif
|
||||
|
||||
@@ -30,6 +30,8 @@ typedef struct KVMSlot
|
||||
int as_id;
|
||||
/* Cache of the offset in ram address space */
|
||||
ram_addr_t ram_start_offset;
|
||||
int guest_memfd;
|
||||
hwaddr guest_memfd_offset;
|
||||
} KVMSlot;
|
||||
|
||||
typedef struct KVMMemoryUpdate {
|
||||
|
||||
@@ -829,8 +829,21 @@ __SYSCALL(__NR_futex_wait, sys_futex_wait)
|
||||
#define __NR_futex_requeue 456
|
||||
__SYSCALL(__NR_futex_requeue, sys_futex_requeue)
|
||||
|
||||
#define __NR_statmount 457
|
||||
__SYSCALL(__NR_statmount, sys_statmount)
|
||||
|
||||
#define __NR_listmount 458
|
||||
__SYSCALL(__NR_listmount, sys_listmount)
|
||||
|
||||
#define __NR_lsm_get_self_attr 459
|
||||
__SYSCALL(__NR_lsm_get_self_attr, sys_lsm_get_self_attr)
|
||||
#define __NR_lsm_set_self_attr 460
|
||||
__SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr)
|
||||
#define __NR_lsm_list_modules 461
|
||||
__SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules)
|
||||
|
||||
#undef __NR_syscalls
|
||||
#define __NR_syscalls 457
|
||||
#define __NR_syscalls 462
|
||||
|
||||
/*
|
||||
* 32 bit systems traditionally used different
|
||||
|
||||
@@ -88,7 +88,7 @@
|
||||
#define MADV_HUGEPAGE 14 /* Worth backing with hugepages */
|
||||
#define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */
|
||||
|
||||
#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump,
|
||||
#define MADV_DONTDUMP 16 /* Explicitly exclude from core dump,
|
||||
overrides the coredump filter bits */
|
||||
#define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */
|
||||
|
||||
|
||||
@@ -385,5 +385,10 @@
|
||||
#define __NR_futex_wake (__NR_Linux + 454)
|
||||
#define __NR_futex_wait (__NR_Linux + 455)
|
||||
#define __NR_futex_requeue (__NR_Linux + 456)
|
||||
#define __NR_statmount (__NR_Linux + 457)
|
||||
#define __NR_listmount (__NR_Linux + 458)
|
||||
#define __NR_lsm_get_self_attr (__NR_Linux + 459)
|
||||
#define __NR_lsm_set_self_attr (__NR_Linux + 460)
|
||||
#define __NR_lsm_list_modules (__NR_Linux + 461)
|
||||
|
||||
#endif /* _ASM_UNISTD_N32_H */
|
||||
|
||||
@@ -361,5 +361,10 @@
|
||||
#define __NR_futex_wake (__NR_Linux + 454)
|
||||
#define __NR_futex_wait (__NR_Linux + 455)
|
||||
#define __NR_futex_requeue (__NR_Linux + 456)
|
||||
#define __NR_statmount (__NR_Linux + 457)
|
||||
#define __NR_listmount (__NR_Linux + 458)
|
||||
#define __NR_lsm_get_self_attr (__NR_Linux + 459)
|
||||
#define __NR_lsm_set_self_attr (__NR_Linux + 460)
|
||||
#define __NR_lsm_list_modules (__NR_Linux + 461)
|
||||
|
||||
#endif /* _ASM_UNISTD_N64_H */
|
||||
|
||||
@@ -431,5 +431,10 @@
|
||||
#define __NR_futex_wake (__NR_Linux + 454)
|
||||
#define __NR_futex_wait (__NR_Linux + 455)
|
||||
#define __NR_futex_requeue (__NR_Linux + 456)
|
||||
#define __NR_statmount (__NR_Linux + 457)
|
||||
#define __NR_listmount (__NR_Linux + 458)
|
||||
#define __NR_lsm_get_self_attr (__NR_Linux + 459)
|
||||
#define __NR_lsm_set_self_attr (__NR_Linux + 460)
|
||||
#define __NR_lsm_list_modules (__NR_Linux + 461)
|
||||
|
||||
#endif /* _ASM_UNISTD_O32_H */
|
||||
|
||||
@@ -438,6 +438,11 @@
|
||||
#define __NR_futex_wake 454
|
||||
#define __NR_futex_wait 455
|
||||
#define __NR_futex_requeue 456
|
||||
#define __NR_statmount 457
|
||||
#define __NR_listmount 458
|
||||
#define __NR_lsm_get_self_attr 459
|
||||
#define __NR_lsm_set_self_attr 460
|
||||
#define __NR_lsm_list_modules 461
|
||||
|
||||
|
||||
#endif /* _ASM_UNISTD_32_H */
|
||||
|
||||
@@ -410,6 +410,11 @@
|
||||
#define __NR_futex_wake 454
|
||||
#define __NR_futex_wait 455
|
||||
#define __NR_futex_requeue 456
|
||||
#define __NR_statmount 457
|
||||
#define __NR_listmount 458
|
||||
#define __NR_lsm_get_self_attr 459
|
||||
#define __NR_lsm_set_self_attr 460
|
||||
#define __NR_lsm_list_modules 461
|
||||
|
||||
|
||||
#endif /* _ASM_UNISTD_64_H */
|
||||
|
||||
@@ -139,6 +139,33 @@ enum KVM_RISCV_ISA_EXT_ID {
|
||||
KVM_RISCV_ISA_EXT_ZIHPM,
|
||||
KVM_RISCV_ISA_EXT_SMSTATEEN,
|
||||
KVM_RISCV_ISA_EXT_ZICOND,
|
||||
KVM_RISCV_ISA_EXT_ZBC,
|
||||
KVM_RISCV_ISA_EXT_ZBKB,
|
||||
KVM_RISCV_ISA_EXT_ZBKC,
|
||||
KVM_RISCV_ISA_EXT_ZBKX,
|
||||
KVM_RISCV_ISA_EXT_ZKND,
|
||||
KVM_RISCV_ISA_EXT_ZKNE,
|
||||
KVM_RISCV_ISA_EXT_ZKNH,
|
||||
KVM_RISCV_ISA_EXT_ZKR,
|
||||
KVM_RISCV_ISA_EXT_ZKSED,
|
||||
KVM_RISCV_ISA_EXT_ZKSH,
|
||||
KVM_RISCV_ISA_EXT_ZKT,
|
||||
KVM_RISCV_ISA_EXT_ZVBB,
|
||||
KVM_RISCV_ISA_EXT_ZVBC,
|
||||
KVM_RISCV_ISA_EXT_ZVKB,
|
||||
KVM_RISCV_ISA_EXT_ZVKG,
|
||||
KVM_RISCV_ISA_EXT_ZVKNED,
|
||||
KVM_RISCV_ISA_EXT_ZVKNHA,
|
||||
KVM_RISCV_ISA_EXT_ZVKNHB,
|
||||
KVM_RISCV_ISA_EXT_ZVKSED,
|
||||
KVM_RISCV_ISA_EXT_ZVKSH,
|
||||
KVM_RISCV_ISA_EXT_ZVKT,
|
||||
KVM_RISCV_ISA_EXT_ZFH,
|
||||
KVM_RISCV_ISA_EXT_ZFHMIN,
|
||||
KVM_RISCV_ISA_EXT_ZIHINTNTL,
|
||||
KVM_RISCV_ISA_EXT_ZVFH,
|
||||
KVM_RISCV_ISA_EXT_ZVFHMIN,
|
||||
KVM_RISCV_ISA_EXT_ZFA,
|
||||
KVM_RISCV_ISA_EXT_MAX,
|
||||
};
|
||||
|
||||
@@ -157,9 +184,16 @@ enum KVM_RISCV_SBI_EXT_ID {
|
||||
KVM_RISCV_SBI_EXT_EXPERIMENTAL,
|
||||
KVM_RISCV_SBI_EXT_VENDOR,
|
||||
KVM_RISCV_SBI_EXT_DBCN,
|
||||
KVM_RISCV_SBI_EXT_STA,
|
||||
KVM_RISCV_SBI_EXT_MAX,
|
||||
};
|
||||
|
||||
/* SBI STA extension registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
|
||||
struct kvm_riscv_sbi_sta {
|
||||
unsigned long shmem_lo;
|
||||
unsigned long shmem_hi;
|
||||
};
|
||||
|
||||
/* Possible states for kvm_riscv_timer */
|
||||
#define KVM_RISCV_TIMER_STATE_OFF 0
|
||||
#define KVM_RISCV_TIMER_STATE_ON 1
|
||||
@@ -241,6 +275,12 @@ enum KVM_RISCV_SBI_EXT_ID {
|
||||
#define KVM_REG_RISCV_VECTOR_REG(n) \
|
||||
((n) + sizeof(struct __riscv_v_ext_state) / sizeof(unsigned long))
|
||||
|
||||
/* Registers for specific SBI extensions are mapped as type 10 */
|
||||
#define KVM_REG_RISCV_SBI_STATE (0x0a << KVM_REG_RISCV_TYPE_SHIFT)
|
||||
#define KVM_REG_RISCV_SBI_STA (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT)
|
||||
#define KVM_REG_RISCV_SBI_STA_REG(name) \
|
||||
(offsetof(struct kvm_riscv_sbi_sta, name) / sizeof(unsigned long))
|
||||
|
||||
/* Device Control API: RISC-V AIA */
|
||||
#define KVM_DEV_RISCV_APLIC_ALIGN 0x1000
|
||||
#define KVM_DEV_RISCV_APLIC_SIZE 0x4000
|
||||
|
||||
@@ -429,5 +429,10 @@
|
||||
#define __NR_futex_wake 454
|
||||
#define __NR_futex_wait 455
|
||||
#define __NR_futex_requeue 456
|
||||
#define __NR_statmount 457
|
||||
#define __NR_listmount 458
|
||||
#define __NR_lsm_get_self_attr 459
|
||||
#define __NR_lsm_set_self_attr 460
|
||||
#define __NR_lsm_list_modules 461
|
||||
|
||||
#endif /* _ASM_S390_UNISTD_32_H */
|
||||
|
||||
@@ -377,5 +377,10 @@
|
||||
#define __NR_futex_wake 454
|
||||
#define __NR_futex_wait 455
|
||||
#define __NR_futex_requeue 456
|
||||
#define __NR_statmount 457
|
||||
#define __NR_listmount 458
|
||||
#define __NR_lsm_get_self_attr 459
|
||||
#define __NR_lsm_set_self_attr 460
|
||||
#define __NR_lsm_list_modules 461
|
||||
|
||||
#endif /* _ASM_S390_UNISTD_64_H */
|
||||
|
||||
@@ -560,4 +560,93 @@ struct kvm_pmu_event_filter {
|
||||
/* x86-specific KVM_EXIT_HYPERCALL flags. */
|
||||
#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0)
|
||||
|
||||
#define KVM_X86_DEFAULT_VM 0
|
||||
#define KVM_X86_SW_PROTECTED_VM 1
|
||||
#define KVM_X86_TDX_VM 2
|
||||
#define KVM_X86_SNP_VM 3
|
||||
|
||||
/* Trust Domain eXtension sub-ioctl() commands. */
|
||||
enum kvm_tdx_cmd_id {
|
||||
KVM_TDX_CAPABILITIES = 0,
|
||||
KVM_TDX_INIT_VM,
|
||||
KVM_TDX_INIT_VCPU,
|
||||
KVM_TDX_EXTEND_MEMORY,
|
||||
KVM_TDX_FINALIZE_VM,
|
||||
|
||||
KVM_TDX_CMD_NR_MAX,
|
||||
};
|
||||
|
||||
struct kvm_tdx_cmd {
|
||||
/* enum kvm_tdx_cmd_id */
|
||||
__u32 id;
|
||||
/* flags for sub-commend. If sub-command doesn't use this, set zero. */
|
||||
__u32 flags;
|
||||
/*
|
||||
* data for each sub-command. An immediate or a pointer to the actual
|
||||
* data in process virtual address. If sub-command doesn't use it,
|
||||
* set zero.
|
||||
*/
|
||||
__u64 data;
|
||||
/*
|
||||
* Auxiliary error code. The sub-command may return TDX SEAMCALL
|
||||
* status code in addition to -Exxx.
|
||||
* Defined for consistency with struct kvm_sev_cmd.
|
||||
*/
|
||||
__u64 error;
|
||||
};
|
||||
|
||||
#define KVM_TDX_CPUID_NO_SUBLEAF ((__u32)-1)
|
||||
|
||||
struct kvm_tdx_cpuid_config {
|
||||
__u32 leaf;
|
||||
__u32 sub_leaf;
|
||||
__u32 eax;
|
||||
__u32 ebx;
|
||||
__u32 ecx;
|
||||
__u32 edx;
|
||||
};
|
||||
|
||||
/* supported_gpaw */
|
||||
#define TDX_CAP_GPAW_48 (1 << 0)
|
||||
#define TDX_CAP_GPAW_52 (1 << 1)
|
||||
|
||||
struct kvm_tdx_capabilities {
|
||||
__u64 attrs_fixed0;
|
||||
__u64 attrs_fixed1;
|
||||
__u64 xfam_fixed0;
|
||||
__u64 xfam_fixed1;
|
||||
__u32 supported_gpaw;
|
||||
__u32 padding;
|
||||
__u64 reserved[251];
|
||||
|
||||
__u32 nr_cpuid_configs;
|
||||
struct kvm_tdx_cpuid_config cpuid_configs[];
|
||||
};
|
||||
|
||||
struct kvm_tdx_init_vm {
|
||||
__u64 attributes;
|
||||
__u64 mrconfigid[6]; /* sha384 digest */
|
||||
__u64 mrowner[6]; /* sha384 digest */
|
||||
__u64 mrownerconfig[6]; /* sha384 digest */
|
||||
/*
|
||||
* For future extensibility to make sizeof(struct kvm_tdx_init_vm) = 8KB.
|
||||
* This should be enough given sizeof(TD_PARAMS) = 1024.
|
||||
* 8KB was chosen given because
|
||||
* sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES(=256) = 8KB.
|
||||
*/
|
||||
__u64 reserved[1004];
|
||||
|
||||
/*
|
||||
* Call KVM_TDX_INIT_VM before vcpu creation, thus before
|
||||
* KVM_SET_CPUID2.
|
||||
* This configuration supersedes KVM_SET_CPUID2s for VCPUs because the
|
||||
* TDX module directly virtualizes those CPUIDs without VMM. The user
|
||||
* space VMM, e.g. qemu, should make KVM_SET_CPUID2 consistent with
|
||||
* those values. If it doesn't, KVM may have wrong idea of vCPUIDs of
|
||||
* the guest, and KVM may wrongly emulate CPUIDs or MSRs that the TDX
|
||||
* module doesn't virtualize.
|
||||
*/
|
||||
struct kvm_cpuid2 cpuid;
|
||||
};
|
||||
|
||||
#endif /* _ASM_X86_KVM_H */
|
||||
|
||||
@@ -447,6 +447,11 @@
|
||||
#define __NR_futex_wake 454
|
||||
#define __NR_futex_wait 455
|
||||
#define __NR_futex_requeue 456
|
||||
#define __NR_statmount 457
|
||||
#define __NR_listmount 458
|
||||
#define __NR_lsm_get_self_attr 459
|
||||
#define __NR_lsm_set_self_attr 460
|
||||
#define __NR_lsm_list_modules 461
|
||||
|
||||
|
||||
#endif /* _ASM_UNISTD_32_H */
|
||||
|
||||
@@ -369,6 +369,11 @@
|
||||
#define __NR_futex_wake 454
|
||||
#define __NR_futex_wait 455
|
||||
#define __NR_futex_requeue 456
|
||||
#define __NR_statmount 457
|
||||
#define __NR_listmount 458
|
||||
#define __NR_lsm_get_self_attr 459
|
||||
#define __NR_lsm_set_self_attr 460
|
||||
#define __NR_lsm_list_modules 461
|
||||
|
||||
|
||||
#endif /* _ASM_UNISTD_64_H */
|
||||
|
||||
@@ -321,6 +321,11 @@
|
||||
#define __NR_futex_wake (__X32_SYSCALL_BIT + 454)
|
||||
#define __NR_futex_wait (__X32_SYSCALL_BIT + 455)
|
||||
#define __NR_futex_requeue (__X32_SYSCALL_BIT + 456)
|
||||
#define __NR_statmount (__X32_SYSCALL_BIT + 457)
|
||||
#define __NR_listmount (__X32_SYSCALL_BIT + 458)
|
||||
#define __NR_lsm_get_self_attr (__X32_SYSCALL_BIT + 459)
|
||||
#define __NR_lsm_set_self_attr (__X32_SYSCALL_BIT + 460)
|
||||
#define __NR_lsm_list_modules (__X32_SYSCALL_BIT + 461)
|
||||
#define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512)
|
||||
#define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513)
|
||||
#define __NR_ioctl (__X32_SYSCALL_BIT + 514)
|
||||
|
||||
@@ -49,6 +49,7 @@ enum {
|
||||
IOMMUFD_CMD_GET_HW_INFO,
|
||||
IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING,
|
||||
IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP,
|
||||
IOMMUFD_CMD_HWPT_INVALIDATE,
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -613,4 +614,82 @@ struct iommu_hwpt_get_dirty_bitmap {
|
||||
#define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \
|
||||
IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP)
|
||||
|
||||
/**
|
||||
* enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation
|
||||
* Data Type
|
||||
* @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1
|
||||
*/
|
||||
enum iommu_hwpt_invalidate_data_type {
|
||||
IOMMU_HWPT_INVALIDATE_DATA_VTD_S1,
|
||||
};
|
||||
|
||||
/**
|
||||
* enum iommu_hwpt_vtd_s1_invalidate_flags - Flags for Intel VT-d
|
||||
* stage-1 cache invalidation
|
||||
* @IOMMU_VTD_INV_FLAGS_LEAF: Indicates whether the invalidation applies
|
||||
* to all-levels page structure cache or just
|
||||
* the leaf PTE cache.
|
||||
*/
|
||||
enum iommu_hwpt_vtd_s1_invalidate_flags {
|
||||
IOMMU_VTD_INV_FLAGS_LEAF = 1 << 0,
|
||||
};
|
||||
|
||||
/**
|
||||
* struct iommu_hwpt_vtd_s1_invalidate - Intel VT-d cache invalidation
|
||||
* (IOMMU_HWPT_INVALIDATE_DATA_VTD_S1)
|
||||
* @addr: The start address of the range to be invalidated. It needs to
|
||||
* be 4KB aligned.
|
||||
* @npages: Number of contiguous 4K pages to be invalidated.
|
||||
* @flags: Combination of enum iommu_hwpt_vtd_s1_invalidate_flags
|
||||
* @__reserved: Must be 0
|
||||
*
|
||||
* The Intel VT-d specific invalidation data for user-managed stage-1 cache
|
||||
* invalidation in nested translation. Userspace uses this structure to
|
||||
* tell the impacted cache scope after modifying the stage-1 page table.
|
||||
*
|
||||
* Invalidating all the caches related to the page table by setting @addr
|
||||
* to be 0 and @npages to be U64_MAX.
|
||||
*
|
||||
* The device TLB will be invalidated automatically if ATS is enabled.
|
||||
*/
|
||||
struct iommu_hwpt_vtd_s1_invalidate {
|
||||
__aligned_u64 addr;
|
||||
__aligned_u64 npages;
|
||||
__u32 flags;
|
||||
__u32 __reserved;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE)
|
||||
* @size: sizeof(struct iommu_hwpt_invalidate)
|
||||
* @hwpt_id: ID of a nested HWPT for cache invalidation
|
||||
* @data_uptr: User pointer to an array of driver-specific cache invalidation
|
||||
* data.
|
||||
* @data_type: One of enum iommu_hwpt_invalidate_data_type, defining the data
|
||||
* type of all the entries in the invalidation request array. It
|
||||
* should be a type supported by the hwpt pointed by @hwpt_id.
|
||||
* @entry_len: Length (in bytes) of a request entry in the request array
|
||||
* @entry_num: Input the number of cache invalidation requests in the array.
|
||||
* Output the number of requests successfully handled by kernel.
|
||||
* @__reserved: Must be 0.
|
||||
*
|
||||
* Invalidate the iommu cache for user-managed page table. Modifications on a
|
||||
* user-managed page table should be followed by this operation to sync cache.
|
||||
* Each ioctl can support one or more cache invalidation requests in the array
|
||||
* that has a total size of @entry_len * @entry_num.
|
||||
*
|
||||
* An empty invalidation request array by setting @entry_num==0 is allowed, and
|
||||
* @entry_len and @data_uptr would be ignored in this case. This can be used to
|
||||
* check if the given @data_type is supported or not by kernel.
|
||||
*/
|
||||
struct iommu_hwpt_invalidate {
|
||||
__u32 size;
|
||||
__u32 hwpt_id;
|
||||
__aligned_u64 data_uptr;
|
||||
__u32 data_type;
|
||||
__u32 entry_len;
|
||||
__u32 entry_num;
|
||||
__u32 __reserved;
|
||||
};
|
||||
#define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE)
|
||||
#endif
|
||||
|
||||
@@ -16,76 +16,6 @@
|
||||
|
||||
#define KVM_API_VERSION 12
|
||||
|
||||
/* *** Deprecated interfaces *** */
|
||||
|
||||
#define KVM_TRC_SHIFT 16
|
||||
|
||||
#define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT)
|
||||
#define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1))
|
||||
|
||||
#define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01)
|
||||
#define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02)
|
||||
#define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01)
|
||||
|
||||
#define KVM_TRC_HEAD_SIZE 12
|
||||
#define KVM_TRC_CYCLE_SIZE 8
|
||||
#define KVM_TRC_EXTRA_MAX 7
|
||||
|
||||
#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02)
|
||||
#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03)
|
||||
#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04)
|
||||
#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05)
|
||||
#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06)
|
||||
#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07)
|
||||
#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08)
|
||||
#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09)
|
||||
#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A)
|
||||
#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B)
|
||||
#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C)
|
||||
#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D)
|
||||
#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E)
|
||||
#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F)
|
||||
#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10)
|
||||
#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11)
|
||||
#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12)
|
||||
#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13)
|
||||
#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14)
|
||||
#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15)
|
||||
#define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16)
|
||||
#define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17)
|
||||
#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18)
|
||||
#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19)
|
||||
|
||||
struct kvm_user_trace_setup {
|
||||
__u32 buf_size;
|
||||
__u32 buf_nr;
|
||||
};
|
||||
|
||||
#define __KVM_DEPRECATED_MAIN_W_0x06 \
|
||||
_IOW(KVMIO, 0x06, struct kvm_user_trace_setup)
|
||||
#define __KVM_DEPRECATED_MAIN_0x07 _IO(KVMIO, 0x07)
|
||||
#define __KVM_DEPRECATED_MAIN_0x08 _IO(KVMIO, 0x08)
|
||||
|
||||
#define __KVM_DEPRECATED_VM_R_0x70 _IOR(KVMIO, 0x70, struct kvm_assigned_irq)
|
||||
|
||||
struct kvm_breakpoint {
|
||||
__u32 enabled;
|
||||
__u32 padding;
|
||||
__u64 address;
|
||||
};
|
||||
|
||||
struct kvm_debug_guest {
|
||||
__u32 enabled;
|
||||
__u32 pad;
|
||||
struct kvm_breakpoint breakpoints[4];
|
||||
__u32 singlestep;
|
||||
};
|
||||
|
||||
#define __KVM_DEPRECATED_VCPU_W_0x87 _IOW(KVMIO, 0x87, struct kvm_debug_guest)
|
||||
|
||||
/* *** End of deprecated interfaces *** */
|
||||
|
||||
|
||||
/* for KVM_SET_USER_MEMORY_REGION */
|
||||
struct kvm_userspace_memory_region {
|
||||
__u32 slot;
|
||||
@@ -95,6 +25,19 @@ struct kvm_userspace_memory_region {
|
||||
__u64 userspace_addr; /* start of the userspace allocated memory */
|
||||
};
|
||||
|
||||
/* for KVM_SET_USER_MEMORY_REGION2 */
|
||||
struct kvm_userspace_memory_region2 {
|
||||
__u32 slot;
|
||||
__u32 flags;
|
||||
__u64 guest_phys_addr;
|
||||
__u64 memory_size;
|
||||
__u64 userspace_addr;
|
||||
__u64 guest_memfd_offset;
|
||||
__u32 guest_memfd;
|
||||
__u32 pad1;
|
||||
__u64 pad2[14];
|
||||
};
|
||||
|
||||
/*
|
||||
* The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for
|
||||
* userspace, other bits are reserved for kvm internal use which are defined
|
||||
@@ -102,6 +45,7 @@ struct kvm_userspace_memory_region {
|
||||
*/
|
||||
#define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0)
|
||||
#define KVM_MEM_READONLY (1UL << 1)
|
||||
#define KVM_MEM_GUEST_MEMFD (1UL << 2)
|
||||
|
||||
/* for KVM_IRQ_LINE */
|
||||
struct kvm_irq_level {
|
||||
@@ -223,6 +167,92 @@ struct kvm_xen_exit {
|
||||
} u;
|
||||
};
|
||||
|
||||
/* masks for reg_mask to indicate which registers are passed. */
|
||||
#define TDX_VMCALL_REG_MASK_RBX BIT_ULL(2)
|
||||
#define TDX_VMCALL_REG_MASK_RDX BIT_ULL(3)
|
||||
#define TDX_VMCALL_REG_MASK_RSI BIT_ULL(6)
|
||||
#define TDX_VMCALL_REG_MASK_RDI BIT_ULL(7)
|
||||
#define TDX_VMCALL_REG_MASK_R8 BIT_ULL(8)
|
||||
#define TDX_VMCALL_REG_MASK_R9 BIT_ULL(9)
|
||||
#define TDX_VMCALL_REG_MASK_R10 BIT_ULL(10)
|
||||
#define TDX_VMCALL_REG_MASK_R11 BIT_ULL(11)
|
||||
#define TDX_VMCALL_REG_MASK_R12 BIT_ULL(12)
|
||||
#define TDX_VMCALL_REG_MASK_R13 BIT_ULL(13)
|
||||
#define TDX_VMCALL_REG_MASK_R14 BIT_ULL(14)
|
||||
#define TDX_VMCALL_REG_MASK_R15 BIT_ULL(15)
|
||||
|
||||
struct kvm_tdx_exit {
|
||||
#define KVM_EXIT_TDX_VMCALL 1
|
||||
__u32 type;
|
||||
__u32 pad;
|
||||
|
||||
union {
|
||||
struct kvm_tdx_vmcall {
|
||||
/*
|
||||
* RAX(bit 0), RCX(bit 1) and RSP(bit 4) are reserved.
|
||||
* RAX(bit 0): TDG.VP.VMCALL status code.
|
||||
* RCX(bit 1): bitmap for used registers.
|
||||
* RSP(bit 4): the caller stack.
|
||||
*/
|
||||
union {
|
||||
__u64 in_rcx;
|
||||
__u64 reg_mask;
|
||||
};
|
||||
|
||||
/*
|
||||
* Guest-Host-Communication Interface for TDX spec
|
||||
* defines the ABI for TDG.VP.VMCALL.
|
||||
*/
|
||||
/* Input parameters: guest -> VMM */
|
||||
union {
|
||||
__u64 in_r10;
|
||||
__u64 type;
|
||||
};
|
||||
union {
|
||||
__u64 in_r11;
|
||||
__u64 subfunction;
|
||||
};
|
||||
/*
|
||||
* Subfunction specific.
|
||||
* Registers are used in this order to pass input
|
||||
* arguments. r12=arg0, r13=arg1, etc.
|
||||
*/
|
||||
__u64 in_r12;
|
||||
__u64 in_r13;
|
||||
__u64 in_r14;
|
||||
__u64 in_r15;
|
||||
__u64 in_rbx;
|
||||
__u64 in_rdi;
|
||||
__u64 in_rsi;
|
||||
__u64 in_r8;
|
||||
__u64 in_r9;
|
||||
__u64 in_rdx;
|
||||
|
||||
/* Output parameters: VMM -> guest */
|
||||
union {
|
||||
__u64 out_r10;
|
||||
__u64 status_code;
|
||||
};
|
||||
/*
|
||||
* Subfunction specific.
|
||||
* Registers are used in this order to output return
|
||||
* values. r11=ret0, r12=ret1, etc.
|
||||
*/
|
||||
__u64 out_r11;
|
||||
__u64 out_r12;
|
||||
__u64 out_r13;
|
||||
__u64 out_r14;
|
||||
__u64 out_r15;
|
||||
__u64 out_rbx;
|
||||
__u64 out_rdi;
|
||||
__u64 out_rsi;
|
||||
__u64 out_r8;
|
||||
__u64 out_r9;
|
||||
__u64 out_rdx;
|
||||
} vmcall;
|
||||
} u;
|
||||
};
|
||||
|
||||
#define KVM_S390_GET_SKEYS_NONE 1
|
||||
#define KVM_S390_SKEYS_MAX 1048576
|
||||
|
||||
@@ -265,6 +295,8 @@ struct kvm_xen_exit {
|
||||
#define KVM_EXIT_RISCV_CSR 36
|
||||
#define KVM_EXIT_NOTIFY 37
|
||||
#define KVM_EXIT_LOONGARCH_IOCSR 38
|
||||
#define KVM_EXIT_MEMORY_FAULT 39
|
||||
#define KVM_EXIT_TDX 40
|
||||
|
||||
/* For KVM_EXIT_INTERNAL_ERROR */
|
||||
/* Emulate instruction failed. */
|
||||
@@ -514,6 +546,15 @@ struct kvm_run {
|
||||
#define KVM_NOTIFY_CONTEXT_INVALID (1 << 0)
|
||||
__u32 flags;
|
||||
} notify;
|
||||
/* KVM_EXIT_MEMORY_FAULT */
|
||||
struct {
|
||||
#define KVM_MEMORY_EXIT_FLAG_PRIVATE (1ULL << 3)
|
||||
__u64 flags;
|
||||
__u64 gpa;
|
||||
__u64 size;
|
||||
} memory_fault;
|
||||
/* KVM_EXIT_TDX_VMCALL */
|
||||
struct kvm_tdx_exit tdx;
|
||||
/* Fix the size of the union. */
|
||||
char padding[256];
|
||||
};
|
||||
@@ -941,9 +982,6 @@ struct kvm_ppc_resize_hpt {
|
||||
*/
|
||||
#define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */
|
||||
#define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2)
|
||||
#define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06
|
||||
#define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07
|
||||
#define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08
|
||||
#define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2)
|
||||
#define KVM_GET_MSR_FEATURE_INDEX_LIST _IOWR(KVMIO, 0x0a, struct kvm_msr_list)
|
||||
|
||||
@@ -1197,6 +1235,13 @@ struct kvm_ppc_resize_hpt {
|
||||
#define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228
|
||||
#define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229
|
||||
#define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230
|
||||
#define KVM_CAP_USER_MEMORY2 231
|
||||
#define KVM_CAP_MEMORY_FAULT_INFO 232
|
||||
#define KVM_CAP_MEMORY_ATTRIBUTES 233
|
||||
#define KVM_CAP_GUEST_MEMFD 234
|
||||
#define KVM_CAP_VM_TYPES 235
|
||||
#define KVM_CAP_MEMORY_MAPPING 236
|
||||
#define KVM_CAP_X86_BUS_FREQUENCY_CONTROL 237
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
@@ -1287,6 +1332,7 @@ struct kvm_x86_mce {
|
||||
#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
|
||||
#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5)
|
||||
#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6)
|
||||
#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7)
|
||||
|
||||
struct kvm_xen_hvm_config {
|
||||
__u32 flags;
|
||||
@@ -1479,6 +1525,8 @@ struct kvm_vfio_spapr_tce {
|
||||
struct kvm_userspace_memory_region)
|
||||
#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47)
|
||||
#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64)
|
||||
#define KVM_SET_USER_MEMORY_REGION2 _IOW(KVMIO, 0x49, \
|
||||
struct kvm_userspace_memory_region2)
|
||||
|
||||
/* enable ucontrol for s390 */
|
||||
struct kvm_s390_ucas_mapping {
|
||||
@@ -1503,20 +1551,8 @@ struct kvm_s390_ucas_mapping {
|
||||
_IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone)
|
||||
#define KVM_UNREGISTER_COALESCED_MMIO \
|
||||
_IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone)
|
||||
#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
|
||||
struct kvm_assigned_pci_dev)
|
||||
#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing)
|
||||
/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */
|
||||
#define KVM_ASSIGN_IRQ __KVM_DEPRECATED_VM_R_0x70
|
||||
#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq)
|
||||
#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71)
|
||||
#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \
|
||||
struct kvm_assigned_pci_dev)
|
||||
#define KVM_ASSIGN_SET_MSIX_NR _IOW(KVMIO, 0x73, \
|
||||
struct kvm_assigned_msix_nr)
|
||||
#define KVM_ASSIGN_SET_MSIX_ENTRY _IOW(KVMIO, 0x74, \
|
||||
struct kvm_assigned_msix_entry)
|
||||
#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
|
||||
#define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd)
|
||||
#define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config)
|
||||
#define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78)
|
||||
@@ -1533,9 +1569,6 @@ struct kvm_s390_ucas_mapping {
|
||||
* KVM_CAP_VM_TSC_CONTROL to set defaults for a VM */
|
||||
#define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2)
|
||||
#define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3)
|
||||
/* Available with KVM_CAP_PCI_2_3 */
|
||||
#define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \
|
||||
struct kvm_assigned_pci_dev)
|
||||
/* Available with KVM_CAP_SIGNAL_MSI */
|
||||
#define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi)
|
||||
/* Available with KVM_CAP_PPC_GET_SMMU_INFO */
|
||||
@@ -1588,8 +1621,6 @@ struct kvm_s390_ucas_mapping {
|
||||
#define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs)
|
||||
#define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation)
|
||||
#define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt)
|
||||
/* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */
|
||||
#define KVM_DEBUG_GUEST __KVM_DEPRECATED_VCPU_W_0x87
|
||||
#define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs)
|
||||
#define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs)
|
||||
#define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid)
|
||||
@@ -2263,4 +2294,33 @@ struct kvm_s390_zpci_op {
|
||||
/* flags for kvm_s390_zpci_op->u.reg_aen.flags */
|
||||
#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0)
|
||||
|
||||
/* Available with KVM_CAP_MEMORY_ATTRIBUTES */
|
||||
#define KVM_SET_MEMORY_ATTRIBUTES _IOW(KVMIO, 0xd2, struct kvm_memory_attributes)
|
||||
|
||||
struct kvm_memory_attributes {
|
||||
__u64 address;
|
||||
__u64 size;
|
||||
__u64 attributes;
|
||||
__u64 flags;
|
||||
};
|
||||
|
||||
#define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3)
|
||||
|
||||
#define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd)
|
||||
|
||||
struct kvm_create_guest_memfd {
|
||||
__u64 size;
|
||||
__u64 flags;
|
||||
__u64 reserved[6];
|
||||
};
|
||||
|
||||
#define KVM_MEMORY_MAPPING _IOWR(KVMIO, 0xd5, struct kvm_memory_mapping)
|
||||
|
||||
struct kvm_memory_mapping {
|
||||
__u64 base_gfn;
|
||||
__u64 nr_pages;
|
||||
__u64 flags;
|
||||
__u64 source;
|
||||
};
|
||||
|
||||
#endif /* __LINUX_KVM_H */
|
||||
|
||||
@@ -41,7 +41,8 @@
|
||||
UFFD_FEATURE_WP_HUGETLBFS_SHMEM | \
|
||||
UFFD_FEATURE_WP_UNPOPULATED | \
|
||||
UFFD_FEATURE_POISON | \
|
||||
UFFD_FEATURE_WP_ASYNC)
|
||||
UFFD_FEATURE_WP_ASYNC | \
|
||||
UFFD_FEATURE_MOVE)
|
||||
#define UFFD_API_IOCTLS \
|
||||
((__u64)1 << _UFFDIO_REGISTER | \
|
||||
(__u64)1 << _UFFDIO_UNREGISTER | \
|
||||
@@ -50,6 +51,7 @@
|
||||
((__u64)1 << _UFFDIO_WAKE | \
|
||||
(__u64)1 << _UFFDIO_COPY | \
|
||||
(__u64)1 << _UFFDIO_ZEROPAGE | \
|
||||
(__u64)1 << _UFFDIO_MOVE | \
|
||||
(__u64)1 << _UFFDIO_WRITEPROTECT | \
|
||||
(__u64)1 << _UFFDIO_CONTINUE | \
|
||||
(__u64)1 << _UFFDIO_POISON)
|
||||
@@ -73,6 +75,7 @@
|
||||
#define _UFFDIO_WAKE (0x02)
|
||||
#define _UFFDIO_COPY (0x03)
|
||||
#define _UFFDIO_ZEROPAGE (0x04)
|
||||
#define _UFFDIO_MOVE (0x05)
|
||||
#define _UFFDIO_WRITEPROTECT (0x06)
|
||||
#define _UFFDIO_CONTINUE (0x07)
|
||||
#define _UFFDIO_POISON (0x08)
|
||||
@@ -92,6 +95,8 @@
|
||||
struct uffdio_copy)
|
||||
#define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \
|
||||
struct uffdio_zeropage)
|
||||
#define UFFDIO_MOVE _IOWR(UFFDIO, _UFFDIO_MOVE, \
|
||||
struct uffdio_move)
|
||||
#define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \
|
||||
struct uffdio_writeprotect)
|
||||
#define UFFDIO_CONTINUE _IOWR(UFFDIO, _UFFDIO_CONTINUE, \
|
||||
@@ -222,6 +227,9 @@ struct uffdio_api {
|
||||
* asynchronous mode is supported in which the write fault is
|
||||
* automatically resolved and write-protection is un-set.
|
||||
* It implies UFFD_FEATURE_WP_UNPOPULATED.
|
||||
*
|
||||
* UFFD_FEATURE_MOVE indicates that the kernel supports moving an
|
||||
* existing page contents from userspace.
|
||||
*/
|
||||
#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
|
||||
#define UFFD_FEATURE_EVENT_FORK (1<<1)
|
||||
@@ -239,6 +247,7 @@ struct uffdio_api {
|
||||
#define UFFD_FEATURE_WP_UNPOPULATED (1<<13)
|
||||
#define UFFD_FEATURE_POISON (1<<14)
|
||||
#define UFFD_FEATURE_WP_ASYNC (1<<15)
|
||||
#define UFFD_FEATURE_MOVE (1<<16)
|
||||
__u64 features;
|
||||
|
||||
__u64 ioctls;
|
||||
@@ -347,6 +356,24 @@ struct uffdio_poison {
|
||||
__s64 updated;
|
||||
};
|
||||
|
||||
struct uffdio_move {
|
||||
__u64 dst;
|
||||
__u64 src;
|
||||
__u64 len;
|
||||
/*
|
||||
* Especially if used to atomically remove memory from the
|
||||
* address space the wake on the dst range is not needed.
|
||||
*/
|
||||
#define UFFDIO_MOVE_MODE_DONTWAKE ((__u64)1<<0)
|
||||
#define UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES ((__u64)1<<1)
|
||||
__u64 mode;
|
||||
/*
|
||||
* "move" is written by the ioctl and must be at the end: the
|
||||
* copy_from_user will not read the last 8 bytes.
|
||||
*/
|
||||
__s64 move;
|
||||
};
|
||||
|
||||
/*
|
||||
* Flags for the userfaultfd(2) system call itself.
|
||||
*/
|
||||
|
||||
@@ -1219,6 +1219,7 @@ enum vfio_device_mig_state {
|
||||
VFIO_DEVICE_STATE_RUNNING_P2P = 5,
|
||||
VFIO_DEVICE_STATE_PRE_COPY = 6,
|
||||
VFIO_DEVICE_STATE_PRE_COPY_P2P = 7,
|
||||
VFIO_DEVICE_STATE_NR,
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@@ -895,6 +895,42 @@
|
||||
'reduced-phys-bits': 'uint32',
|
||||
'*kernel-hashes': 'bool' } }
|
||||
|
||||
##
|
||||
# @TdxGuestProperties:
|
||||
#
|
||||
# Properties for tdx-guest objects.
|
||||
#
|
||||
# @sept-ve-disable: toggle bit 28 of TD attributes to control disabling
|
||||
# of EPT violation conversion to #VE on guest TD access of PENDING
|
||||
# pages. Some guest OS (e.g., Linux TD guest) may require this to
|
||||
# be set, otherwise they refuse to boot.
|
||||
#
|
||||
# @mrconfigid: ID for non-owner-defined configuration of the guest TD,
|
||||
# e.g., run-time or OS configuration (base64 encoded SHA384 digest).
|
||||
# (A default value 0 of SHA384 is used when absent).
|
||||
#
|
||||
# @mrowner: ID for the guest TD’s owner (base64 encoded SHA384 digest).
|
||||
# (A default value 0 of SHA384 is used when absent).
|
||||
#
|
||||
# @mrownerconfig: ID for owner-defined configuration of the guest TD,
|
||||
# e.g., specific to the workload rather than the run-time or OS
|
||||
# (base64 encoded SHA384 digest). (A default value 0 of SHA384 is
|
||||
# used when absent).
|
||||
#
|
||||
# @quote-generation-socket: socket address for Quote Generation
|
||||
# Service (QGS). QGS is a daemon running on the host. User in
|
||||
# TD guest cannot get TD quoting for attestation if QGS is not
|
||||
# provided. So admin should always provide it.
|
||||
#
|
||||
# Since: 9.0
|
||||
##
|
||||
{ 'struct': 'TdxGuestProperties',
|
||||
'data': { '*sept-ve-disable': 'bool',
|
||||
'*mrconfigid': 'str',
|
||||
'*mrowner': 'str',
|
||||
'*mrownerconfig': 'str',
|
||||
'*quote-generation-socket': 'SocketAddress' } }
|
||||
|
||||
##
|
||||
# @ThreadContextProperties:
|
||||
#
|
||||
@@ -974,6 +1010,7 @@
|
||||
'sev-guest',
|
||||
'thread-context',
|
||||
's390-pv-guest',
|
||||
'tdx-guest',
|
||||
'throttle-group',
|
||||
'tls-creds-anon',
|
||||
'tls-creds-psk',
|
||||
@@ -1041,6 +1078,7 @@
|
||||
'secret_keyring': { 'type': 'SecretKeyringProperties',
|
||||
'if': 'CONFIG_SECRET_KEYRING' },
|
||||
'sev-guest': 'SevGuestProperties',
|
||||
'tdx-guest': 'TdxGuestProperties',
|
||||
'thread-context': 'ThreadContextProperties',
|
||||
'throttle-group': 'ThrottleGroupProperties',
|
||||
'tls-creds-anon': 'TlsCredsAnonProperties',
|
||||
|
||||
@@ -483,10 +483,12 @@
|
||||
#
|
||||
# @s390: s390 guest panic information type (Since: 2.12)
|
||||
#
|
||||
# @tdx: tdx guest panic information type (Since: 9.0)
|
||||
#
|
||||
# Since: 2.9
|
||||
##
|
||||
{ 'enum': 'GuestPanicInformationType',
|
||||
'data': [ 'hyper-v', 's390' ] }
|
||||
'data': [ 'hyper-v', 's390', 'tdx' ] }
|
||||
|
||||
##
|
||||
# @GuestPanicInformation:
|
||||
@@ -501,7 +503,8 @@
|
||||
'base': {'type': 'GuestPanicInformationType'},
|
||||
'discriminator': 'type',
|
||||
'data': {'hyper-v': 'GuestPanicInformationHyperV',
|
||||
's390': 'GuestPanicInformationS390'}}
|
||||
's390': 'GuestPanicInformationS390',
|
||||
'tdx' : 'GuestPanicInformationTdx'}}
|
||||
|
||||
##
|
||||
# @GuestPanicInformationHyperV:
|
||||
@@ -564,6 +567,30 @@
|
||||
'psw-addr': 'uint64',
|
||||
'reason': 'S390CrashReason'}}
|
||||
|
||||
##
|
||||
# @GuestPanicInformationTdx:
|
||||
#
|
||||
# TDX Guest panic information specific to TDX, as specified in the
|
||||
# "Guest-Hypervisor Communication Interface (GHCI) Specification",
|
||||
# section TDG.VP.VMCALL<ReportFatalError>.
|
||||
#
|
||||
# @error-code: TD-specific error code
|
||||
#
|
||||
# @message: Human-readable error message provided by the guest. Not
|
||||
# to be trusted.
|
||||
#
|
||||
# @gpa: guest-physical address of a page that contains more verbose
|
||||
# error information, as zero-terminated string. Present when the
|
||||
# "GPA valid" bit (bit 63) is set in @error-code.
|
||||
#
|
||||
#
|
||||
# Since: 9.0
|
||||
##
|
||||
{'struct': 'GuestPanicInformationTdx',
|
||||
'data': {'error-code': 'uint64',
|
||||
'message': 'str',
|
||||
'*gpa': 'uint64'}}
|
||||
|
||||
##
|
||||
# @MEMORY_FAILURE:
|
||||
#
|
||||
|
||||
@@ -1850,6 +1850,11 @@ bool memory_region_is_protected(MemoryRegion *mr)
|
||||
return mr->ram && (mr->ram_block->flags & RAM_PROTECTED);
|
||||
}
|
||||
|
||||
bool memory_region_has_guest_memfd(MemoryRegion *mr)
|
||||
{
|
||||
return mr->ram_block && mr->ram_block->guest_memfd >= 0;
|
||||
}
|
||||
|
||||
uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr)
|
||||
{
|
||||
uint8_t mask = mr->dirty_log_mask;
|
||||
@@ -3601,6 +3606,31 @@ bool memory_region_init_ram(MemoryRegion *mr,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool memory_region_init_ram_guest_memfd(MemoryRegion *mr,
|
||||
Object *owner,
|
||||
const char *name,
|
||||
uint64_t size,
|
||||
Error **errp)
|
||||
{
|
||||
DeviceState *owner_dev;
|
||||
|
||||
if (!memory_region_init_ram_flags_nomigrate(mr, owner, name, size,
|
||||
RAM_GUEST_MEMFD, errp)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* This will assert if owner is neither NULL nor a DeviceState.
|
||||
* We only want the owner here for the purposes of defining a
|
||||
* unique name for migration. TODO: Ideally we should implement
|
||||
* a naming scheme for Objects which are not DeviceStates, in
|
||||
* which case we can relax this restriction.
|
||||
*/
|
||||
owner_dev = DEVICE(owner);
|
||||
vmstate_register_ram(mr, owner_dev);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool memory_region_init_rom(MemoryRegion *mr,
|
||||
Object *owner,
|
||||
const char *name,
|
||||
|
||||
@@ -1841,6 +1841,17 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
|
||||
}
|
||||
}
|
||||
|
||||
if (kvm_enabled() && (new_block->flags & RAM_GUEST_MEMFD)) {
|
||||
assert(new_block->guest_memfd < 0);
|
||||
|
||||
new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length,
|
||||
0, errp);
|
||||
if (new_block->guest_memfd < 0) {
|
||||
qemu_mutex_unlock_ramlist();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
new_ram_size = MAX(old_ram_size,
|
||||
(new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
|
||||
if (new_ram_size > old_ram_size) {
|
||||
@@ -1903,7 +1914,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
|
||||
/* Just support these ram flags by now. */
|
||||
assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
|
||||
RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY |
|
||||
RAM_READONLY_FD)) == 0);
|
||||
RAM_READONLY_FD | RAM_GUEST_MEMFD)) == 0);
|
||||
|
||||
if (xen_enabled()) {
|
||||
error_setg(errp, "-mem-path not supported with Xen");
|
||||
@@ -1938,6 +1949,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
|
||||
new_block->used_length = size;
|
||||
new_block->max_length = size;
|
||||
new_block->flags = ram_flags;
|
||||
new_block->guest_memfd = -1;
|
||||
new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset,
|
||||
errp);
|
||||
if (!new_block->host) {
|
||||
@@ -2016,7 +2028,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
|
||||
Error *local_err = NULL;
|
||||
|
||||
assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC |
|
||||
RAM_NORESERVE)) == 0);
|
||||
RAM_NORESERVE| RAM_GUEST_MEMFD)) == 0);
|
||||
assert(!host ^ (ram_flags & RAM_PREALLOC));
|
||||
|
||||
size = HOST_PAGE_ALIGN(size);
|
||||
@@ -2028,6 +2040,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
|
||||
new_block->max_length = max_size;
|
||||
assert(max_size >= size);
|
||||
new_block->fd = -1;
|
||||
new_block->guest_memfd = -1;
|
||||
new_block->page_size = qemu_real_host_page_size();
|
||||
new_block->host = host;
|
||||
new_block->flags = ram_flags;
|
||||
@@ -2050,7 +2063,7 @@ RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
|
||||
RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags,
|
||||
MemoryRegion *mr, Error **errp)
|
||||
{
|
||||
assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE)) == 0);
|
||||
assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0);
|
||||
return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp);
|
||||
}
|
||||
|
||||
@@ -2078,6 +2091,11 @@ static void reclaim_ramblock(RAMBlock *block)
|
||||
} else {
|
||||
qemu_anon_ram_free(block->host, block->max_length);
|
||||
}
|
||||
|
||||
if (block->guest_memfd >= 0) {
|
||||
close(block->guest_memfd);
|
||||
}
|
||||
|
||||
g_free(block);
|
||||
}
|
||||
|
||||
@@ -3600,6 +3618,29 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start,
|
||||
size_t length)
|
||||
{
|
||||
int ret = -1;
|
||||
|
||||
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
|
||||
ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
|
||||
start, length);
|
||||
|
||||
if (ret) {
|
||||
ret = -errno;
|
||||
error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)",
|
||||
__func__, rb->idstr, start, length, ret);
|
||||
}
|
||||
#else
|
||||
ret = -ENOSYS;
|
||||
error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)",
|
||||
__func__, rb->idstr, start, length, ret);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool ramblock_is_pmem(RAMBlock *rb)
|
||||
{
|
||||
return rb->flags & RAM_PMEM;
|
||||
|
||||
@@ -519,6 +519,52 @@ static void qemu_system_wakeup(void)
|
||||
}
|
||||
}
|
||||
|
||||
static char* tdx_parse_panic_message(char *message)
|
||||
{
|
||||
bool printable = false;
|
||||
char *buf = NULL;
|
||||
int len = 0, i;
|
||||
|
||||
/*
|
||||
* Although message is defined as a json string, we shouldn't
|
||||
* unconditionally treat it as is because the guest generated it and
|
||||
* it's not necessarily trustable.
|
||||
*/
|
||||
if (message) {
|
||||
/* The caller guarantees the NUL-terminated string. */
|
||||
len = strlen(message);
|
||||
|
||||
printable = len > 0;
|
||||
for (i = 0; i < len; i++) {
|
||||
if (!(0x20 <= message[i] && message[i] <= 0x7e)) {
|
||||
printable = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!printable && len) {
|
||||
/* 3 = length of "%02x " */
|
||||
buf = g_malloc(len * 3);
|
||||
for (i = 0; i < len; i++) {
|
||||
if (message[i] == '\0') {
|
||||
break;
|
||||
} else {
|
||||
sprintf(buf + 3 * i, "%02x ", message[i]);
|
||||
}
|
||||
}
|
||||
if (i > 0)
|
||||
/* replace the last ' '(space) to NUL */
|
||||
buf[i * 3 - 1] = '\0';
|
||||
else
|
||||
buf[0] = '\0';
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
return message;
|
||||
}
|
||||
|
||||
void qemu_system_guest_panicked(GuestPanicInformation *info)
|
||||
{
|
||||
qemu_log_mask(LOG_GUEST_ERROR, "Guest crashed");
|
||||
@@ -560,7 +606,19 @@ void qemu_system_guest_panicked(GuestPanicInformation *info)
|
||||
S390CrashReason_str(info->u.s390.reason),
|
||||
info->u.s390.psw_mask,
|
||||
info->u.s390.psw_addr);
|
||||
} else if (info->type == GUEST_PANIC_INFORMATION_TYPE_TDX) {
|
||||
qemu_log_mask(LOG_GUEST_ERROR,
|
||||
" TDX guest reports fatal error:"
|
||||
" error code: 0x%#" PRIx64 " error message:\"%s\"\n",
|
||||
info->u.tdx.error_code,
|
||||
tdx_parse_panic_message(info->u.tdx.message));
|
||||
if (info->u.tdx.error_code & (1ull << 63)) {
|
||||
qemu_log_mask(LOG_GUEST_ERROR, "Additional error information "
|
||||
"can be found at gpa page: 0x%#" PRIx64 "\n",
|
||||
info->u.tdx.gpa);
|
||||
}
|
||||
}
|
||||
|
||||
qapi_free_GuestPanicInformation(info);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,6 +20,15 @@
|
||||
#ifndef I386_CPU_INTERNAL_H
|
||||
#define I386_CPU_INTERNAL_H
|
||||
|
||||
typedef struct FeatureMask {
|
||||
FeatureWord index;
|
||||
uint64_t mask;
|
||||
} FeatureMask;
|
||||
|
||||
typedef struct FeatureDep {
|
||||
FeatureMask from, to;
|
||||
} FeatureDep;
|
||||
|
||||
typedef enum FeatureWordType {
|
||||
CPUID_FEATURE_WORD,
|
||||
MSR_FEATURE_WORD,
|
||||
|
||||
@@ -1443,15 +1443,6 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
|
||||
},
|
||||
};
|
||||
|
||||
typedef struct FeatureMask {
|
||||
FeatureWord index;
|
||||
uint64_t mask;
|
||||
} FeatureMask;
|
||||
|
||||
typedef struct FeatureDep {
|
||||
FeatureMask from, to;
|
||||
} FeatureDep;
|
||||
|
||||
static FeatureDep feature_dependencies[] = {
|
||||
{
|
||||
.from = { FEAT_7_0_EDX, CPUID_7_0_EDX_ARCH_CAPABILITIES },
|
||||
@@ -1576,9 +1567,6 @@ static const X86RegisterInfo32 x86_reg_info_32[CPU_NB_REGS32] = {
|
||||
};
|
||||
#undef REGISTER
|
||||
|
||||
/* CPUID feature bits available in XSS */
|
||||
#define CPUID_XSTATE_XSS_MASK (XSTATE_ARCH_LBR_MASK)
|
||||
|
||||
ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = {
|
||||
[XSTATE_FP_BIT] = {
|
||||
/* x87 FP state component is always enabled if XSAVE is supported */
|
||||
|
||||
@@ -595,6 +595,9 @@ typedef enum X86Seg {
|
||||
XSTATE_Hi16_ZMM_MASK | XSTATE_PKRU_MASK | \
|
||||
XSTATE_XTILE_CFG_MASK | XSTATE_XTILE_DATA_MASK)
|
||||
|
||||
/* CPUID feature bits available in XSS */
|
||||
#define CPUID_XSTATE_XSS_MASK (XSTATE_ARCH_LBR_MASK)
|
||||
|
||||
/* CPUID feature words */
|
||||
typedef enum FeatureWord {
|
||||
FEAT_1_EDX, /* CPUID[1].EDX */
|
||||
@@ -787,6 +790,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
|
||||
|
||||
/* Support RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */
|
||||
#define CPUID_7_0_EBX_FSGSBASE (1U << 0)
|
||||
/* Support for TSC adjustment MSR 0x3B */
|
||||
#define CPUID_7_0_EBX_TSC_ADJUST (1U << 1)
|
||||
/* Support SGX */
|
||||
#define CPUID_7_0_EBX_SGX (1U << 2)
|
||||
/* 1st Group of Advanced Bit Manipulation Extensions */
|
||||
@@ -805,8 +810,12 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
|
||||
#define CPUID_7_0_EBX_INVPCID (1U << 10)
|
||||
/* Restricted Transactional Memory */
|
||||
#define CPUID_7_0_EBX_RTM (1U << 11)
|
||||
/* Cache QoS Monitoring */
|
||||
#define CPUID_7_0_EBX_PQM (1U << 12)
|
||||
/* Memory Protection Extension */
|
||||
#define CPUID_7_0_EBX_MPX (1U << 14)
|
||||
/* Resource Director Technology Allocation */
|
||||
#define CPUID_7_0_EBX_RDT_A (1U << 15)
|
||||
/* AVX-512 Foundation */
|
||||
#define CPUID_7_0_EBX_AVX512F (1U << 16)
|
||||
/* AVX-512 Doubleword & Quadword Instruction */
|
||||
@@ -862,12 +871,20 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
|
||||
#define CPUID_7_0_ECX_AVX512VNNI (1U << 11)
|
||||
/* Support for VPOPCNT[B,W] and VPSHUFBITQMB */
|
||||
#define CPUID_7_0_ECX_AVX512BITALG (1U << 12)
|
||||
/* Intel Total Memory Encryption */
|
||||
#define CPUID_7_0_ECX_TME (1U << 13)
|
||||
/* POPCNT for vectors of DW/QW */
|
||||
#define CPUID_7_0_ECX_AVX512_VPOPCNTDQ (1U << 14)
|
||||
/* Placeholder for bit 15 */
|
||||
#define CPUID_7_0_ECX_FZM (1U << 15)
|
||||
/* 5-level Page Tables */
|
||||
#define CPUID_7_0_ECX_LA57 (1U << 16)
|
||||
/* MAWAU for MPX */
|
||||
#define CPUID_7_0_ECX_MAWAU (31U << 17)
|
||||
/* Read Processor ID */
|
||||
#define CPUID_7_0_ECX_RDPID (1U << 22)
|
||||
/* KeyLocker */
|
||||
#define CPUID_7_0_ECX_KeyLocker (1U << 23)
|
||||
/* Bus Lock Debug Exception */
|
||||
#define CPUID_7_0_ECX_BUS_LOCK_DETECT (1U << 24)
|
||||
/* Cache Line Demote Instruction */
|
||||
@@ -876,6 +893,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
|
||||
#define CPUID_7_0_ECX_MOVDIRI (1U << 27)
|
||||
/* Move 64 Bytes as Direct Store Instruction */
|
||||
#define CPUID_7_0_ECX_MOVDIR64B (1U << 28)
|
||||
/* ENQCMD and ENQCMDS instructions */
|
||||
#define CPUID_7_0_ECX_ENQCMD (1U << 29)
|
||||
/* Support SGX Launch Control */
|
||||
#define CPUID_7_0_ECX_SGX_LC (1U << 30)
|
||||
/* Protection Keys for Supervisor-mode Pages */
|
||||
@@ -893,6 +912,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
|
||||
#define CPUID_7_0_EDX_SERIALIZE (1U << 14)
|
||||
/* TSX Suspend Load Address Tracking instruction */
|
||||
#define CPUID_7_0_EDX_TSX_LDTRK (1U << 16)
|
||||
/* PCONFIG instruction */
|
||||
#define CPUID_7_0_EDX_PCONFIG (1U << 18)
|
||||
/* Architectural LBRs */
|
||||
#define CPUID_7_0_EDX_ARCH_LBR (1U << 19)
|
||||
/* AMX_BF16 instruction */
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#include "sysemu/sysemu.h"
|
||||
#include "hw/boards.h"
|
||||
|
||||
#include "tdx.h"
|
||||
#include "kvm_i386.h"
|
||||
#include "hw/core/accel-cpu.h"
|
||||
|
||||
@@ -60,6 +61,10 @@ static bool lmce_supported(void)
|
||||
if (kvm_ioctl(kvm_state, KVM_X86_GET_MCE_CAP_SUPPORTED, &mce_cap) < 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (is_tdx_vm())
|
||||
return false;
|
||||
|
||||
return !!(mce_cap & MCG_LMCE_P);
|
||||
}
|
||||
|
||||
|
||||
@@ -32,6 +32,7 @@
|
||||
#include "sysemu/runstate.h"
|
||||
#include "kvm_i386.h"
|
||||
#include "sev.h"
|
||||
#include "tdx.h"
|
||||
#include "xen-emu.h"
|
||||
#include "hyperv.h"
|
||||
#include "hyperv-proto.h"
|
||||
@@ -161,6 +162,35 @@ static KVMMSRHandlers msr_handlers[KVM_MSR_FILTER_MAX_RANGES];
|
||||
static RateLimit bus_lock_ratelimit_ctrl;
|
||||
static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value);
|
||||
|
||||
static const char *vm_type_name[] = {
|
||||
[KVM_X86_DEFAULT_VM] = "default",
|
||||
[KVM_X86_TDX_VM] = "tdx",
|
||||
};
|
||||
|
||||
int kvm_get_vm_type(MachineState *ms, const char *vm_type)
|
||||
{
|
||||
int kvm_type = KVM_X86_DEFAULT_VM;
|
||||
|
||||
if (ms->cgs && object_dynamic_cast(OBJECT(ms->cgs), TYPE_TDX_GUEST)) {
|
||||
kvm_type = KVM_X86_TDX_VM;
|
||||
}
|
||||
|
||||
/*
|
||||
* old KVM doesn't support KVM_CAP_VM_TYPES and KVM_X86_DEFAULT_VM
|
||||
* is always supported
|
||||
*/
|
||||
if (kvm_type == KVM_X86_DEFAULT_VM) {
|
||||
return kvm_type;
|
||||
}
|
||||
|
||||
if (!(kvm_check_extension(KVM_STATE(ms->accelerator), KVM_CAP_VM_TYPES) & BIT(kvm_type))) {
|
||||
error_report("vm-type %s not supported by KVM", vm_type_name[kvm_type]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return kvm_type;
|
||||
}
|
||||
|
||||
bool kvm_has_smm(void)
|
||||
{
|
||||
return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM);
|
||||
@@ -247,7 +277,7 @@ void kvm_synchronize_all_tsc(void)
|
||||
{
|
||||
CPUState *cpu;
|
||||
|
||||
if (kvm_enabled()) {
|
||||
if (kvm_enabled() && !is_tdx_vm()) {
|
||||
CPU_FOREACH(cpu) {
|
||||
run_on_cpu(cpu, do_kvm_synchronize_tsc, RUN_ON_CPU_NULL);
|
||||
}
|
||||
@@ -490,6 +520,10 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
|
||||
ret |= 1U << KVM_HINTS_REALTIME;
|
||||
}
|
||||
|
||||
if (is_tdx_vm()) {
|
||||
tdx_get_supported_cpuid(function, index, reg, &ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -759,6 +793,15 @@ static int kvm_arch_set_tsc_khz(CPUState *cs)
|
||||
int r, cur_freq;
|
||||
bool set_ioctl = false;
|
||||
|
||||
/*
|
||||
* TSC of TD vcpu is immutable, it cannot be set/changed via vcpu scope
|
||||
* VM_SET_TSC_KHZ, but only be initialized via VM scope VM_SET_TSC_KHZ
|
||||
* before ioctl KVM_TDX_INIT_VM in tdx_pre_create_vcpu()
|
||||
*/
|
||||
if (is_tdx_vm()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!env->tsc_khz) {
|
||||
return 0;
|
||||
}
|
||||
@@ -1655,8 +1698,6 @@ static int hyperv_init_vcpu(X86CPU *cpu)
|
||||
|
||||
static Error *invtsc_mig_blocker;
|
||||
|
||||
#define KVM_MAX_CPUID_ENTRIES 100
|
||||
|
||||
static void kvm_init_xsave(CPUX86State *env)
|
||||
{
|
||||
if (has_xsave2) {
|
||||
@@ -1699,6 +1740,241 @@ static void kvm_init_nested_state(CPUX86State *env)
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t kvm_x86_arch_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries,
|
||||
uint32_t cpuid_i)
|
||||
{
|
||||
uint32_t limit, i, j;
|
||||
uint32_t unused;
|
||||
struct kvm_cpuid_entry2 *c;
|
||||
|
||||
if (cpuid_i > KVM_MAX_CPUID_ENTRIES) {
|
||||
error_report("exceeded cpuid index (%d) for entries[]", cpuid_i);
|
||||
abort();
|
||||
}
|
||||
|
||||
cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
|
||||
|
||||
for (i = 0; i <= limit; i++) {
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
fprintf(stderr, "unsupported level value: 0x%x\n", limit);
|
||||
abort();
|
||||
}
|
||||
c = &entries[cpuid_i++];
|
||||
|
||||
switch (i) {
|
||||
case 2: {
|
||||
/* Keep reading function 2 till all the input is received */
|
||||
int times;
|
||||
|
||||
c->function = i;
|
||||
c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC |
|
||||
KVM_CPUID_FLAG_STATE_READ_NEXT;
|
||||
cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
|
||||
times = c->eax & 0xff;
|
||||
|
||||
for (j = 1; j < times; ++j) {
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
fprintf(stderr, "cpuid_data is full, no space for "
|
||||
"cpuid(eax:2):eax & 0xf = 0x%x\n", times);
|
||||
abort();
|
||||
}
|
||||
c = &entries[cpuid_i++];
|
||||
c->function = i;
|
||||
c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
|
||||
cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 0x1f:
|
||||
if (env->nr_dies < 2) {
|
||||
cpuid_i--;
|
||||
break;
|
||||
}
|
||||
/* fallthrough */
|
||||
case 4:
|
||||
case 0xb:
|
||||
case 0xd:
|
||||
for (j = 0; ; j++) {
|
||||
if (i == 0xd && j == 64) {
|
||||
break;
|
||||
}
|
||||
|
||||
c->function = i;
|
||||
c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
c->index = j;
|
||||
cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
|
||||
|
||||
if (i == 4 && c->eax == 0) {
|
||||
break;
|
||||
}
|
||||
if (i == 0xb && !(c->ecx & 0xff00)) {
|
||||
break;
|
||||
}
|
||||
if (i == 0x1f && !(c->ecx & 0xff00)) {
|
||||
break;
|
||||
}
|
||||
if (i == 0xd && c->eax == 0) {
|
||||
continue;
|
||||
}
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
fprintf(stderr, "cpuid_data is full, no space for "
|
||||
"cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
|
||||
abort();
|
||||
}
|
||||
c = &entries[cpuid_i++];
|
||||
}
|
||||
break;
|
||||
case 0x12:
|
||||
for (j = 0; ; j++) {
|
||||
c->function = i;
|
||||
c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
c->index = j;
|
||||
cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
|
||||
|
||||
if (j > 1 && (c->eax & 0xf) != 1) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
fprintf(stderr, "cpuid_data is full, no space for "
|
||||
"cpuid(eax:0x12,ecx:0x%x)\n", j);
|
||||
abort();
|
||||
}
|
||||
c = &entries[cpuid_i++];
|
||||
}
|
||||
break;
|
||||
case 0x7:
|
||||
case 0x14:
|
||||
case 0x1d:
|
||||
case 0x1e: {
|
||||
uint32_t times;
|
||||
|
||||
c->function = i;
|
||||
c->index = 0;
|
||||
c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
|
||||
times = c->eax;
|
||||
|
||||
for (j = 1; j <= times; ++j) {
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
fprintf(stderr, "cpuid_data is full, no space for "
|
||||
"cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
|
||||
abort();
|
||||
}
|
||||
c = &entries[cpuid_i++];
|
||||
c->function = i;
|
||||
c->index = j;
|
||||
c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
c->function = i;
|
||||
c->flags = 0;
|
||||
cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
|
||||
if (!c->eax && !c->ebx && !c->ecx && !c->edx) {
|
||||
/*
|
||||
* KVM already returns all zeroes if a CPUID entry is missing,
|
||||
* so we can omit it and avoid hitting KVM's 80-entry limit.
|
||||
*/
|
||||
cpuid_i--;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (limit >= 0x0a) {
|
||||
uint32_t eax, edx;
|
||||
|
||||
cpu_x86_cpuid(env, 0x0a, 0, &eax, &unused, &unused, &edx);
|
||||
|
||||
has_architectural_pmu_version = eax & 0xff;
|
||||
if (has_architectural_pmu_version > 0) {
|
||||
num_architectural_pmu_gp_counters = (eax & 0xff00) >> 8;
|
||||
|
||||
/* Shouldn't be more than 32, since that's the number of bits
|
||||
* available in EBX to tell us _which_ counters are available.
|
||||
* Play it safe.
|
||||
*/
|
||||
if (num_architectural_pmu_gp_counters > MAX_GP_COUNTERS) {
|
||||
num_architectural_pmu_gp_counters = MAX_GP_COUNTERS;
|
||||
}
|
||||
|
||||
if (has_architectural_pmu_version > 1) {
|
||||
num_architectural_pmu_fixed_counters = edx & 0x1f;
|
||||
|
||||
if (num_architectural_pmu_fixed_counters > MAX_FIXED_COUNTERS) {
|
||||
num_architectural_pmu_fixed_counters = MAX_FIXED_COUNTERS;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
|
||||
|
||||
for (i = 0x80000000; i <= limit; i++) {
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit);
|
||||
abort();
|
||||
}
|
||||
c = &entries[cpuid_i++];
|
||||
|
||||
switch (i) {
|
||||
case 0x8000001d:
|
||||
/* Query for all AMD cache information leaves */
|
||||
for (j = 0; ; j++) {
|
||||
c->function = i;
|
||||
c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
c->index = j;
|
||||
cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
|
||||
|
||||
if (c->eax == 0) {
|
||||
break;
|
||||
}
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
fprintf(stderr, "cpuid_data is full, no space for "
|
||||
"cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
|
||||
abort();
|
||||
}
|
||||
c = &entries[cpuid_i++];
|
||||
}
|
||||
break;
|
||||
default:
|
||||
c->function = i;
|
||||
c->flags = 0;
|
||||
cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
|
||||
if (!c->eax && !c->ebx && !c->ecx && !c->edx) {
|
||||
/*
|
||||
* KVM already returns all zeroes if a CPUID entry is missing,
|
||||
* so we can omit it and avoid hitting KVM's 80-entry limit.
|
||||
*/
|
||||
cpuid_i--;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Call Centaur's CPUID instructions they are supported. */
|
||||
if (env->cpuid_xlevel2 > 0) {
|
||||
cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused);
|
||||
|
||||
for (i = 0xC0000000; i <= limit; i++) {
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit);
|
||||
abort();
|
||||
}
|
||||
c = &entries[cpuid_i++];
|
||||
|
||||
c->function = i;
|
||||
c->flags = 0;
|
||||
cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
|
||||
}
|
||||
}
|
||||
|
||||
return cpuid_i;
|
||||
}
|
||||
|
||||
int kvm_arch_init_vcpu(CPUState *cs)
|
||||
{
|
||||
struct {
|
||||
@@ -1715,8 +1991,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
|
||||
|
||||
X86CPU *cpu = X86_CPU(cs);
|
||||
CPUX86State *env = &cpu->env;
|
||||
uint32_t limit, i, j, cpuid_i;
|
||||
uint32_t unused;
|
||||
uint32_t cpuid_i;
|
||||
struct kvm_cpuid_entry2 *c;
|
||||
uint32_t signature[3];
|
||||
int kvm_base = KVM_CPUID_SIGNATURE;
|
||||
@@ -1869,8 +2144,6 @@ int kvm_arch_init_vcpu(CPUState *cs)
|
||||
c->edx = env->features[FEAT_KVM_HINTS];
|
||||
}
|
||||
|
||||
cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
|
||||
|
||||
if (cpu->kvm_pv_enforce_cpuid) {
|
||||
r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1);
|
||||
if (r < 0) {
|
||||
@@ -1881,224 +2154,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i <= limit; i++) {
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
fprintf(stderr, "unsupported level value: 0x%x\n", limit);
|
||||
abort();
|
||||
}
|
||||
c = &cpuid_data.entries[cpuid_i++];
|
||||
|
||||
switch (i) {
|
||||
case 2: {
|
||||
/* Keep reading function 2 till all the input is received */
|
||||
int times;
|
||||
|
||||
c->function = i;
|
||||
c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC |
|
||||
KVM_CPUID_FLAG_STATE_READ_NEXT;
|
||||
cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
|
||||
times = c->eax & 0xff;
|
||||
|
||||
for (j = 1; j < times; ++j) {
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
fprintf(stderr, "cpuid_data is full, no space for "
|
||||
"cpuid(eax:2):eax & 0xf = 0x%x\n", times);
|
||||
abort();
|
||||
}
|
||||
c = &cpuid_data.entries[cpuid_i++];
|
||||
c->function = i;
|
||||
c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
|
||||
cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 0x1f:
|
||||
if (env->nr_dies < 2) {
|
||||
cpuid_i--;
|
||||
break;
|
||||
}
|
||||
/* fallthrough */
|
||||
case 4:
|
||||
case 0xb:
|
||||
case 0xd:
|
||||
for (j = 0; ; j++) {
|
||||
if (i == 0xd && j == 64) {
|
||||
break;
|
||||
}
|
||||
|
||||
c->function = i;
|
||||
c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
c->index = j;
|
||||
cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
|
||||
|
||||
if (i == 4 && c->eax == 0) {
|
||||
break;
|
||||
}
|
||||
if (i == 0xb && !(c->ecx & 0xff00)) {
|
||||
break;
|
||||
}
|
||||
if (i == 0x1f && !(c->ecx & 0xff00)) {
|
||||
break;
|
||||
}
|
||||
if (i == 0xd && c->eax == 0) {
|
||||
continue;
|
||||
}
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
fprintf(stderr, "cpuid_data is full, no space for "
|
||||
"cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
|
||||
abort();
|
||||
}
|
||||
c = &cpuid_data.entries[cpuid_i++];
|
||||
}
|
||||
break;
|
||||
case 0x12:
|
||||
for (j = 0; ; j++) {
|
||||
c->function = i;
|
||||
c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
c->index = j;
|
||||
cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
|
||||
|
||||
if (j > 1 && (c->eax & 0xf) != 1) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
fprintf(stderr, "cpuid_data is full, no space for "
|
||||
"cpuid(eax:0x12,ecx:0x%x)\n", j);
|
||||
abort();
|
||||
}
|
||||
c = &cpuid_data.entries[cpuid_i++];
|
||||
}
|
||||
break;
|
||||
case 0x7:
|
||||
case 0x14:
|
||||
case 0x1d:
|
||||
case 0x1e: {
|
||||
uint32_t times;
|
||||
|
||||
c->function = i;
|
||||
c->index = 0;
|
||||
c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
|
||||
times = c->eax;
|
||||
|
||||
for (j = 1; j <= times; ++j) {
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
fprintf(stderr, "cpuid_data is full, no space for "
|
||||
"cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
|
||||
abort();
|
||||
}
|
||||
c = &cpuid_data.entries[cpuid_i++];
|
||||
c->function = i;
|
||||
c->index = j;
|
||||
c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
c->function = i;
|
||||
c->flags = 0;
|
||||
cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
|
||||
if (!c->eax && !c->ebx && !c->ecx && !c->edx) {
|
||||
/*
|
||||
* KVM already returns all zeroes if a CPUID entry is missing,
|
||||
* so we can omit it and avoid hitting KVM's 80-entry limit.
|
||||
*/
|
||||
cpuid_i--;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (limit >= 0x0a) {
|
||||
uint32_t eax, edx;
|
||||
|
||||
cpu_x86_cpuid(env, 0x0a, 0, &eax, &unused, &unused, &edx);
|
||||
|
||||
has_architectural_pmu_version = eax & 0xff;
|
||||
if (has_architectural_pmu_version > 0) {
|
||||
num_architectural_pmu_gp_counters = (eax & 0xff00) >> 8;
|
||||
|
||||
/* Shouldn't be more than 32, since that's the number of bits
|
||||
* available in EBX to tell us _which_ counters are available.
|
||||
* Play it safe.
|
||||
*/
|
||||
if (num_architectural_pmu_gp_counters > MAX_GP_COUNTERS) {
|
||||
num_architectural_pmu_gp_counters = MAX_GP_COUNTERS;
|
||||
}
|
||||
|
||||
if (has_architectural_pmu_version > 1) {
|
||||
num_architectural_pmu_fixed_counters = edx & 0x1f;
|
||||
|
||||
if (num_architectural_pmu_fixed_counters > MAX_FIXED_COUNTERS) {
|
||||
num_architectural_pmu_fixed_counters = MAX_FIXED_COUNTERS;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
|
||||
|
||||
for (i = 0x80000000; i <= limit; i++) {
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit);
|
||||
abort();
|
||||
}
|
||||
c = &cpuid_data.entries[cpuid_i++];
|
||||
|
||||
switch (i) {
|
||||
case 0x8000001d:
|
||||
/* Query for all AMD cache information leaves */
|
||||
for (j = 0; ; j++) {
|
||||
c->function = i;
|
||||
c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||
c->index = j;
|
||||
cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
|
||||
|
||||
if (c->eax == 0) {
|
||||
break;
|
||||
}
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
fprintf(stderr, "cpuid_data is full, no space for "
|
||||
"cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
|
||||
abort();
|
||||
}
|
||||
c = &cpuid_data.entries[cpuid_i++];
|
||||
}
|
||||
break;
|
||||
default:
|
||||
c->function = i;
|
||||
c->flags = 0;
|
||||
cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
|
||||
if (!c->eax && !c->ebx && !c->ecx && !c->edx) {
|
||||
/*
|
||||
* KVM already returns all zeroes if a CPUID entry is missing,
|
||||
* so we can omit it and avoid hitting KVM's 80-entry limit.
|
||||
*/
|
||||
cpuid_i--;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Call Centaur's CPUID instructions they are supported. */
|
||||
if (env->cpuid_xlevel2 > 0) {
|
||||
cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused);
|
||||
|
||||
for (i = 0xC0000000; i <= limit; i++) {
|
||||
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
|
||||
fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit);
|
||||
abort();
|
||||
}
|
||||
c = &cpuid_data.entries[cpuid_i++];
|
||||
|
||||
c->function = i;
|
||||
c->flags = 0;
|
||||
cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
|
||||
}
|
||||
}
|
||||
|
||||
cpuid_i = kvm_x86_arch_cpuid(env, cpuid_data.entries, cpuid_i);
|
||||
cpuid_data.cpuid.nent = cpuid_i;
|
||||
|
||||
if (((env->cpuid_version >> 8)&0xF) >= 6
|
||||
@@ -2224,6 +2280,15 @@ int kvm_arch_init_vcpu(CPUState *cs)
|
||||
return r;
|
||||
}
|
||||
|
||||
int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
|
||||
{
|
||||
if (is_tdx_vm()) {
|
||||
return tdx_pre_create_vcpu(cpu, errp);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_arch_destroy_vcpu(CPUState *cs)
|
||||
{
|
||||
X86CPU *cpu = X86_CPU(cs);
|
||||
@@ -2520,21 +2585,14 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
|
||||
Error *local_err = NULL;
|
||||
|
||||
/*
|
||||
* Initialize SEV context, if required
|
||||
*
|
||||
* If no memory encryption is requested (ms->cgs == NULL) this is
|
||||
* a no-op.
|
||||
*
|
||||
* It's also a no-op if a non-SEV confidential guest support
|
||||
* mechanism is selected. SEV is the only mechanism available to
|
||||
* select on x86 at present, so this doesn't arise, but if new
|
||||
* mechanisms are supported in future (e.g. TDX), they'll need
|
||||
* their own initialization either here or elsewhere.
|
||||
* Initialize confidential guest (SEV/TDX) context, if required
|
||||
*/
|
||||
ret = sev_kvm_init(ms->cgs, &local_err);
|
||||
if (ret < 0) {
|
||||
error_report_err(local_err);
|
||||
return ret;
|
||||
if (ms->cgs) {
|
||||
ret = confidential_guest_kvm_init(ms->cgs, &local_err);
|
||||
if (ret < 0) {
|
||||
error_report_err(local_err);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
has_xcrs = kvm_check_extension(s, KVM_CAP_XCRS);
|
||||
@@ -2994,6 +3052,11 @@ void kvm_put_apicbase(X86CPU *cpu, uint64_t value)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* TODO: Allow accessing guest state for debug TDs. */
|
||||
if (is_tdx_vm()) {
|
||||
return;
|
||||
}
|
||||
|
||||
ret = kvm_put_one_msr(cpu, MSR_IA32_APICBASE, value);
|
||||
assert(ret == 1);
|
||||
}
|
||||
@@ -3212,32 +3275,34 @@ static void kvm_init_msrs(X86CPU *cpu)
|
||||
CPUX86State *env = &cpu->env;
|
||||
|
||||
kvm_msr_buf_reset(cpu);
|
||||
if (has_msr_arch_capabs) {
|
||||
kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES,
|
||||
env->features[FEAT_ARCH_CAPABILITIES]);
|
||||
}
|
||||
|
||||
if (has_msr_core_capabs) {
|
||||
kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY,
|
||||
env->features[FEAT_CORE_CAPABILITY]);
|
||||
}
|
||||
if (!is_tdx_vm()) {
|
||||
if (has_msr_arch_capabs) {
|
||||
kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES,
|
||||
env->features[FEAT_ARCH_CAPABILITIES]);
|
||||
}
|
||||
|
||||
if (has_msr_perf_capabs && cpu->enable_pmu) {
|
||||
kvm_msr_entry_add_perf(cpu, env->features);
|
||||
if (has_msr_core_capabs) {
|
||||
kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY,
|
||||
env->features[FEAT_CORE_CAPABILITY]);
|
||||
}
|
||||
|
||||
if (has_msr_perf_capabs && cpu->enable_pmu) {
|
||||
kvm_msr_entry_add_perf(cpu, env->features);
|
||||
}
|
||||
|
||||
/*
|
||||
* Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but
|
||||
* all kernels with MSR features should have them.
|
||||
*/
|
||||
if (kvm_feature_msrs && cpu_has_vmx(env)) {
|
||||
kvm_msr_entry_add_vmx(cpu, env->features);
|
||||
}
|
||||
}
|
||||
|
||||
if (has_msr_ucode_rev) {
|
||||
kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev);
|
||||
}
|
||||
|
||||
/*
|
||||
* Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but
|
||||
* all kernels with MSR features should have them.
|
||||
*/
|
||||
if (kvm_feature_msrs && cpu_has_vmx(env)) {
|
||||
kvm_msr_entry_add_vmx(cpu, env->features);
|
||||
}
|
||||
|
||||
assert(kvm_buf_set_msrs(cpu) == 0);
|
||||
}
|
||||
|
||||
@@ -4555,6 +4620,11 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
|
||||
|
||||
assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
|
||||
|
||||
/* TODO: Allow accessing guest state for debug TDs. */
|
||||
if (is_tdx_vm()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Put MSR_IA32_FEATURE_CONTROL first, this ensures the VM gets out of VMX
|
||||
* root operation upon vCPU reset. kvm_put_msr_feature_control() should also
|
||||
@@ -4655,6 +4725,12 @@ int kvm_arch_get_registers(CPUState *cs)
|
||||
if (ret < 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* TODO: Allow accessing guest state for debug TDs. */
|
||||
if (is_tdx_vm()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = kvm_getput_regs(cpu, 0);
|
||||
if (ret < 0) {
|
||||
goto out;
|
||||
@@ -5355,6 +5431,14 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
|
||||
ret = kvm_xen_handle_exit(cpu, &run->xen);
|
||||
break;
|
||||
#endif
|
||||
case KVM_EXIT_TDX:
|
||||
if (!is_tdx_vm()) {
|
||||
error_report("KVM: get KVM_EXIT_TDX for a non-TDX VM.");
|
||||
ret = -1;
|
||||
break;
|
||||
}
|
||||
ret = tdx_handle_exit(cpu, &run->tdx);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
|
||||
ret = -1;
|
||||
@@ -5607,7 +5691,7 @@ bool kvm_has_waitpkg(void)
|
||||
|
||||
bool kvm_arch_cpu_check_are_resettable(void)
|
||||
{
|
||||
return !sev_es_enabled();
|
||||
return !sev_es_enabled() && !is_tdx_vm();
|
||||
}
|
||||
|
||||
#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025
|
||||
|
||||
@@ -13,6 +13,8 @@
|
||||
|
||||
#include "sysemu/kvm.h"
|
||||
|
||||
#define KVM_MAX_CPUID_ENTRIES 100
|
||||
|
||||
#ifdef CONFIG_KVM
|
||||
|
||||
#define kvm_pit_in_kernel() \
|
||||
@@ -22,6 +24,9 @@
|
||||
#define kvm_ioapic_in_kernel() \
|
||||
(kvm_irqchip_in_kernel() && !kvm_irqchip_is_split())
|
||||
|
||||
uint32_t kvm_x86_arch_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries,
|
||||
uint32_t cpuid_i);
|
||||
|
||||
#else
|
||||
|
||||
#define kvm_pit_in_kernel() 0
|
||||
@@ -37,6 +42,7 @@ bool kvm_hv_vpindex_settable(void);
|
||||
bool kvm_enable_sgx_provisioning(KVMState *s);
|
||||
bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp);
|
||||
|
||||
int kvm_get_vm_type(MachineState *ms, const char *vm_type);
|
||||
void kvm_arch_reset_vcpu(X86CPU *cs);
|
||||
void kvm_arch_after_reset_vcpu(X86CPU *cpu);
|
||||
void kvm_arch_do_init_vcpu(X86CPU *cs);
|
||||
|
||||
@@ -7,7 +7,7 @@ i386_kvm_ss.add(files(
|
||||
|
||||
i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c'))
|
||||
|
||||
i386_kvm_ss.add(when: 'CONFIG_SEV', if_false: files('sev-stub.c'))
|
||||
i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c', 'tdx-quote-generator.c'), if_false: files('tdx-stub.c'))
|
||||
|
||||
i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c'))
|
||||
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
/*
|
||||
* QEMU SEV stub
|
||||
*
|
||||
* Copyright Advanced Micro Devices 2018
|
||||
*
|
||||
* Authors:
|
||||
* Brijesh Singh <brijesh.singh@amd.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "sev.h"
|
||||
|
||||
int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
{
|
||||
/* If we get here, cgs must be some non-SEV thing */
|
||||
return 0;
|
||||
}
|
||||
170
target/i386/kvm/tdx-quote-generator.c
Normal file
170
target/i386/kvm/tdx-quote-generator.c
Normal file
@@ -0,0 +1,170 @@
|
||||
/*
|
||||
* QEMU TDX support
|
||||
*
|
||||
* Copyright Intel
|
||||
*
|
||||
* Author:
|
||||
* Xiaoyao Li <xiaoyao.li@intel.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory
|
||||
*
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "qapi/error.h"
|
||||
#include "qapi/qapi-visit-sockets.h"
|
||||
|
||||
#include "tdx-quote-generator.h"
|
||||
|
||||
typedef struct TdxQuoteGeneratorClass {
|
||||
DeviceClass parent_class;
|
||||
} TdxQuoteGeneratorClass;
|
||||
|
||||
OBJECT_DEFINE_TYPE(TdxQuoteGenerator, tdx_quote_generator, TDX_QUOTE_GENERATOR, OBJECT)
|
||||
|
||||
static void tdx_quote_generator_finalize(Object *obj)
|
||||
{
|
||||
}
|
||||
|
||||
static void tdx_quote_generator_class_init(ObjectClass *oc, void *data)
|
||||
{
|
||||
}
|
||||
|
||||
static void tdx_quote_generator_init(Object *obj)
|
||||
{
|
||||
}
|
||||
|
||||
static void tdx_generate_quote_cleanup(struct tdx_generate_quote_task *task)
|
||||
{
|
||||
timer_del(&task->timer);
|
||||
|
||||
g_source_remove(task->watch);
|
||||
qio_channel_close(QIO_CHANNEL(task->sioc), NULL);
|
||||
object_unref(OBJECT(task->sioc));
|
||||
|
||||
/* Maintain the number of in-flight requests. */
|
||||
qemu_mutex_lock(&task->quote_gen->lock);
|
||||
task->quote_gen->num--;
|
||||
qemu_mutex_unlock(&task->quote_gen->lock);
|
||||
|
||||
task->completion(task);
|
||||
}
|
||||
|
||||
static gboolean tdx_get_quote_read(QIOChannel *ioc, GIOCondition condition,
|
||||
gpointer opaque)
|
||||
{
|
||||
struct tdx_generate_quote_task *task = opaque;
|
||||
Error *err = NULL;
|
||||
int ret;
|
||||
|
||||
ret = qio_channel_read(ioc, task->receive_buf + task->receive_buf_received,
|
||||
task->payload_len - task->receive_buf_received, &err);
|
||||
if (ret < 0) {
|
||||
if (ret == QIO_CHANNEL_ERR_BLOCK) {
|
||||
return G_SOURCE_CONTINUE;
|
||||
} else {
|
||||
error_report_err(err);
|
||||
task->status_code = TDX_VP_GET_QUOTE_ERROR;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
|
||||
task->receive_buf_received += ret;
|
||||
if (ret == 0 || task->receive_buf_received == task->payload_len) {
|
||||
task->status_code = TDX_VP_GET_QUOTE_SUCCESS;
|
||||
goto end;
|
||||
}
|
||||
|
||||
return G_SOURCE_CONTINUE;
|
||||
|
||||
end:
|
||||
tdx_generate_quote_cleanup(task);
|
||||
return G_SOURCE_REMOVE;
|
||||
}
|
||||
|
||||
static gboolean tdx_send_report(QIOChannel *ioc, GIOCondition condition,
|
||||
gpointer opaque)
|
||||
{
|
||||
struct tdx_generate_quote_task *task = opaque;
|
||||
Error *err = NULL;
|
||||
int ret;
|
||||
|
||||
ret = qio_channel_write(ioc, task->send_data + task->send_data_sent,
|
||||
task->send_data_size - task->send_data_sent, &err);
|
||||
if (ret < 0) {
|
||||
if (ret == QIO_CHANNEL_ERR_BLOCK) {
|
||||
ret = 0;
|
||||
} else {
|
||||
error_report_err(err);
|
||||
task->status_code = TDX_VP_GET_QUOTE_ERROR;
|
||||
tdx_generate_quote_cleanup(task);
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
task->send_data_sent += ret;
|
||||
|
||||
if (task->send_data_sent == task->send_data_size) {
|
||||
task->watch = qio_channel_add_watch(QIO_CHANNEL(task->sioc), G_IO_IN,
|
||||
tdx_get_quote_read, task, NULL);
|
||||
goto end;
|
||||
}
|
||||
|
||||
return G_SOURCE_CONTINUE;
|
||||
|
||||
end:
|
||||
return G_SOURCE_REMOVE;
|
||||
}
|
||||
|
||||
static void tdx_quote_generator_connected(QIOTask *qio_task, gpointer opaque)
|
||||
{
|
||||
struct tdx_generate_quote_task *task = opaque;
|
||||
Error *err = NULL;
|
||||
int ret;
|
||||
|
||||
ret = qio_task_propagate_error(qio_task, &err);
|
||||
if (ret) {
|
||||
error_report_err(err);
|
||||
task->status_code = TDX_VP_GET_QUOTE_QGS_UNAVAILABLE;
|
||||
tdx_generate_quote_cleanup(task);
|
||||
return;
|
||||
}
|
||||
|
||||
task->watch = qio_channel_add_watch(QIO_CHANNEL(task->sioc), G_IO_OUT,
|
||||
tdx_send_report, task, NULL);
|
||||
}
|
||||
|
||||
#define TRANSACTION_TIMEOUT 30000
|
||||
|
||||
static void getquote_expired(void *opaque)
|
||||
{
|
||||
struct tdx_generate_quote_task *task = opaque;
|
||||
|
||||
task->status_code = TDX_VP_GET_QUOTE_ERROR;
|
||||
tdx_generate_quote_cleanup(task);
|
||||
}
|
||||
|
||||
static void setup_get_quote_timer(struct tdx_generate_quote_task *task)
|
||||
{
|
||||
int64_t time;
|
||||
|
||||
timer_init_ms(&task->timer, QEMU_CLOCK_VIRTUAL, getquote_expired, task);
|
||||
time = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
|
||||
timer_mod(&task->timer, time + TRANSACTION_TIMEOUT);
|
||||
}
|
||||
|
||||
void tdx_generate_quote(struct tdx_generate_quote_task *task)
|
||||
{
|
||||
struct TdxQuoteGenerator *quote_gen = task->quote_gen;
|
||||
QIOChannelSocket *sioc;
|
||||
|
||||
sioc = qio_channel_socket_new();
|
||||
task->sioc = sioc;
|
||||
|
||||
setup_get_quote_timer(task);
|
||||
|
||||
qio_channel_socket_connect_async(sioc, quote_gen->socket,
|
||||
tdx_quote_generator_connected, task,
|
||||
NULL, NULL);
|
||||
}
|
||||
95
target/i386/kvm/tdx-quote-generator.h
Normal file
95
target/i386/kvm/tdx-quote-generator.h
Normal file
@@ -0,0 +1,95 @@
|
||||
#ifndef QEMU_I386_TDX_QUOTE_GENERATOR_H
|
||||
#define QEMU_I386_TDX_QUOTE_GENERATOR_H
|
||||
|
||||
#include "qom/object_interfaces.h"
|
||||
#include "io/channel-socket.h"
|
||||
#include "exec/hwaddr.h"
|
||||
|
||||
/* tdx quote generation */
|
||||
struct TdxQuoteGenerator {
|
||||
Object parent_obj;
|
||||
|
||||
int num;
|
||||
SocketAddress *socket;
|
||||
|
||||
QemuMutex lock;
|
||||
};
|
||||
|
||||
#define TYPE_TDX_QUOTE_GENERATOR "tdx-quote-generator"
|
||||
|
||||
OBJECT_DECLARE_SIMPLE_TYPE(TdxQuoteGenerator, TDX_QUOTE_GENERATOR)
|
||||
|
||||
|
||||
#define TDX_GET_QUOTE_STRUCTURE_VERSION 1ULL
|
||||
|
||||
#define TDX_VP_GET_QUOTE_SUCCESS 0ULL
|
||||
#define TDX_VP_GET_QUOTE_IN_FLIGHT (-1ULL)
|
||||
#define TDX_VP_GET_QUOTE_ERROR 0x8000000000000000ULL
|
||||
#define TDX_VP_GET_QUOTE_QGS_UNAVAILABLE 0x8000000000000001ULL
|
||||
|
||||
/* Limit to avoid resource starvation. */
|
||||
#define TDX_GET_QUOTE_MAX_BUF_LEN (128 * 1024)
|
||||
#define TDX_MAX_GET_QUOTE_REQUEST 16
|
||||
|
||||
#define TDX_GET_QUOTE_HDR_SIZE 24
|
||||
|
||||
/* Format of pages shared with guest. */
|
||||
struct tdx_get_quote_header {
|
||||
/* Format version: must be 1 in little endian. */
|
||||
uint64_t structure_version;
|
||||
|
||||
/*
|
||||
* GetQuote status code in little endian:
|
||||
* Guest must set error_code to 0 to avoid information leak.
|
||||
* Qemu sets this before interrupting guest.
|
||||
*/
|
||||
uint64_t error_code;
|
||||
|
||||
/*
|
||||
* in-message size in little endian: The message will follow this header.
|
||||
* The in-message will be send to QGS.
|
||||
*/
|
||||
uint32_t in_len;
|
||||
|
||||
/*
|
||||
* out-message size in little endian:
|
||||
* On request, out_len must be zero to avoid information leak.
|
||||
* On return, message size from QGS. Qemu overwrites this field.
|
||||
* The message will follows this header. The in-message is overwritten.
|
||||
*/
|
||||
uint32_t out_len;
|
||||
|
||||
/*
|
||||
* Message buffer follows.
|
||||
* Guest sets message that will be send to QGS. If out_len > in_len, guest
|
||||
* should zero remaining buffer to avoid information leak.
|
||||
* Qemu overwrites this buffer with a message returned from QGS.
|
||||
*/
|
||||
};
|
||||
|
||||
struct tdx_generate_quote_task {
|
||||
hwaddr buf_gpa;
|
||||
hwaddr payload_gpa;
|
||||
uint64_t payload_len;
|
||||
|
||||
char *send_data;
|
||||
uint64_t send_data_size;
|
||||
uint64_t send_data_sent;
|
||||
|
||||
char *receive_buf;
|
||||
uint64_t receive_buf_received;
|
||||
|
||||
uint64_t status_code;
|
||||
struct tdx_get_quote_header hdr;
|
||||
|
||||
QIOChannelSocket *sioc;
|
||||
guint watch;
|
||||
QEMUTimer timer;
|
||||
struct TdxQuoteGenerator *quote_gen;
|
||||
|
||||
void (*completion)(struct tdx_generate_quote_task *task);
|
||||
};
|
||||
|
||||
void tdx_generate_quote(struct tdx_generate_quote_task *task);
|
||||
|
||||
#endif /* QEMU_I386_TDX_QUOTE_GENERATOR_H */
|
||||
18
target/i386/kvm/tdx-stub.c
Normal file
18
target/i386/kvm/tdx-stub.c
Normal file
@@ -0,0 +1,18 @@
|
||||
#include "qemu/osdep.h"
|
||||
|
||||
#include "tdx.h"
|
||||
|
||||
int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int tdx_parse_tdvf(void *flash_ptr, int size)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int tdx_handle_exit(X86CPU *cpu, struct kvm_tdx_exit *tdx_exit)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
1371
target/i386/kvm/tdx.c
Normal file
1371
target/i386/kvm/tdx.c
Normal file
File diff suppressed because it is too large
Load Diff
81
target/i386/kvm/tdx.h
Normal file
81
target/i386/kvm/tdx.h
Normal file
@@ -0,0 +1,81 @@
|
||||
#ifndef QEMU_I386_TDX_H
|
||||
#define QEMU_I386_TDX_H
|
||||
|
||||
#ifndef CONFIG_USER_ONLY
|
||||
#include CONFIG_DEVICES /* CONFIG_TDX */
|
||||
#endif
|
||||
|
||||
#include "exec/confidential-guest-support.h"
|
||||
#include "hw/i386/tdvf.h"
|
||||
#include "sysemu/kvm.h"
|
||||
|
||||
#include "tdx-quote-generator.h"
|
||||
|
||||
#define TYPE_TDX_GUEST "tdx-guest"
|
||||
#define TDX_GUEST(obj) OBJECT_CHECK(TdxGuest, (obj), TYPE_TDX_GUEST)
|
||||
|
||||
typedef struct TdxGuestClass {
|
||||
ConfidentialGuestSupportClass parent_class;
|
||||
} TdxGuestClass;
|
||||
|
||||
#define TDG_VP_VMCALL_MAP_GPA 0x10001ULL
|
||||
#define TDG_VP_VMCALL_GET_QUOTE 0x10002ULL
|
||||
#define TDG_VP_VMCALL_REPORT_FATAL_ERROR 0x10003ULL
|
||||
#define TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT 0x10004ULL
|
||||
|
||||
#define TDG_VP_VMCALL_SUCCESS 0x0000000000000000ULL
|
||||
#define TDG_VP_VMCALL_RETRY 0x0000000000000001ULL
|
||||
#define TDG_VP_VMCALL_INVALID_OPERAND 0x8000000000000000ULL
|
||||
#define TDG_VP_VMCALL_GPA_INUSE 0x8000000000000001ULL
|
||||
#define TDG_VP_VMCALL_ALIGN_ERROR 0x8000000000000002ULL
|
||||
|
||||
enum TdxRamType{
|
||||
TDX_RAM_UNACCEPTED,
|
||||
TDX_RAM_ADDED,
|
||||
};
|
||||
|
||||
typedef struct TdxRamEntry {
|
||||
uint64_t address;
|
||||
uint64_t length;
|
||||
enum TdxRamType type;
|
||||
} TdxRamEntry;
|
||||
|
||||
typedef struct TdxGuest {
|
||||
ConfidentialGuestSupport parent_obj;
|
||||
|
||||
QemuMutex lock;
|
||||
|
||||
bool initialized;
|
||||
uint64_t attributes; /* TD attributes */
|
||||
char *mrconfigid; /* base64 encoded sha348 digest */
|
||||
char *mrowner; /* base64 encoded sha348 digest */
|
||||
char *mrownerconfig; /* base64 encoded sha348 digest */
|
||||
|
||||
MemoryRegion *tdvf_mr;
|
||||
TdxFirmware tdvf;
|
||||
|
||||
uint32_t nr_ram_entries;
|
||||
TdxRamEntry *ram_entries;
|
||||
|
||||
/* runtime state */
|
||||
uint32_t event_notify_vector;
|
||||
uint32_t event_notify_apicid;
|
||||
|
||||
/* GetQuote */
|
||||
TdxQuoteGenerator *quote_generator;
|
||||
} TdxGuest;
|
||||
|
||||
#ifdef CONFIG_TDX
|
||||
bool is_tdx_vm(void);
|
||||
#else
|
||||
#define is_tdx_vm() 0
|
||||
#endif /* CONFIG_TDX */
|
||||
|
||||
void tdx_get_supported_cpuid(uint32_t function, uint32_t index, int reg,
|
||||
uint32_t *ret);
|
||||
int tdx_pre_create_vcpu(CPUState *cpu, Error **errp);
|
||||
void tdx_set_tdvf_region(MemoryRegion *tdvf_mr);
|
||||
int tdx_parse_tdvf(void *flash_ptr, int size);
|
||||
int tdx_handle_exit(X86CPU *cpu, struct kvm_tdx_exit *tdx_exit);
|
||||
|
||||
#endif /* QEMU_I386_TDX_H */
|
||||
@@ -353,63 +353,6 @@ static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp)
|
||||
sev->kernel_hashes = value;
|
||||
}
|
||||
|
||||
static void
|
||||
sev_guest_class_init(ObjectClass *oc, void *data)
|
||||
{
|
||||
object_class_property_add_str(oc, "sev-device",
|
||||
sev_guest_get_sev_device,
|
||||
sev_guest_set_sev_device);
|
||||
object_class_property_set_description(oc, "sev-device",
|
||||
"SEV device to use");
|
||||
object_class_property_add_str(oc, "dh-cert-file",
|
||||
sev_guest_get_dh_cert_file,
|
||||
sev_guest_set_dh_cert_file);
|
||||
object_class_property_set_description(oc, "dh-cert-file",
|
||||
"guest owners DH certificate (encoded with base64)");
|
||||
object_class_property_add_str(oc, "session-file",
|
||||
sev_guest_get_session_file,
|
||||
sev_guest_set_session_file);
|
||||
object_class_property_set_description(oc, "session-file",
|
||||
"guest owners session parameters (encoded with base64)");
|
||||
object_class_property_add_bool(oc, "kernel-hashes",
|
||||
sev_guest_get_kernel_hashes,
|
||||
sev_guest_set_kernel_hashes);
|
||||
object_class_property_set_description(oc, "kernel-hashes",
|
||||
"add kernel hashes to guest firmware for measured Linux boot");
|
||||
}
|
||||
|
||||
static void
|
||||
sev_guest_instance_init(Object *obj)
|
||||
{
|
||||
SevGuestState *sev = SEV_GUEST(obj);
|
||||
|
||||
sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE);
|
||||
sev->policy = DEFAULT_GUEST_POLICY;
|
||||
object_property_add_uint32_ptr(obj, "policy", &sev->policy,
|
||||
OBJ_PROP_FLAG_READWRITE);
|
||||
object_property_add_uint32_ptr(obj, "handle", &sev->handle,
|
||||
OBJ_PROP_FLAG_READWRITE);
|
||||
object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos,
|
||||
OBJ_PROP_FLAG_READWRITE);
|
||||
object_property_add_uint32_ptr(obj, "reduced-phys-bits",
|
||||
&sev->reduced_phys_bits,
|
||||
OBJ_PROP_FLAG_READWRITE);
|
||||
}
|
||||
|
||||
/* sev guest info */
|
||||
static const TypeInfo sev_guest_info = {
|
||||
.parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT,
|
||||
.name = TYPE_SEV_GUEST,
|
||||
.instance_size = sizeof(SevGuestState),
|
||||
.instance_finalize = sev_guest_finalize,
|
||||
.class_init = sev_guest_class_init,
|
||||
.instance_init = sev_guest_instance_init,
|
||||
.interfaces = (InterfaceInfo[]) {
|
||||
{ TYPE_USER_CREATABLE },
|
||||
{ }
|
||||
}
|
||||
};
|
||||
|
||||
bool
|
||||
sev_enabled(void)
|
||||
{
|
||||
@@ -906,7 +849,7 @@ sev_vm_state_change(void *opaque, bool running, RunState state)
|
||||
}
|
||||
}
|
||||
|
||||
int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
{
|
||||
SevGuestState *sev
|
||||
= (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST);
|
||||
@@ -1383,6 +1326,67 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
sev_guest_class_init(ObjectClass *oc, void *data)
|
||||
{
|
||||
ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
|
||||
|
||||
klass->kvm_init = sev_kvm_init;
|
||||
|
||||
object_class_property_add_str(oc, "sev-device",
|
||||
sev_guest_get_sev_device,
|
||||
sev_guest_set_sev_device);
|
||||
object_class_property_set_description(oc, "sev-device",
|
||||
"SEV device to use");
|
||||
object_class_property_add_str(oc, "dh-cert-file",
|
||||
sev_guest_get_dh_cert_file,
|
||||
sev_guest_set_dh_cert_file);
|
||||
object_class_property_set_description(oc, "dh-cert-file",
|
||||
"guest owners DH certificate (encoded with base64)");
|
||||
object_class_property_add_str(oc, "session-file",
|
||||
sev_guest_get_session_file,
|
||||
sev_guest_set_session_file);
|
||||
object_class_property_set_description(oc, "session-file",
|
||||
"guest owners session parameters (encoded with base64)");
|
||||
object_class_property_add_bool(oc, "kernel-hashes",
|
||||
sev_guest_get_kernel_hashes,
|
||||
sev_guest_set_kernel_hashes);
|
||||
object_class_property_set_description(oc, "kernel-hashes",
|
||||
"add kernel hashes to guest firmware for measured Linux boot");
|
||||
}
|
||||
|
||||
static void
|
||||
sev_guest_instance_init(Object *obj)
|
||||
{
|
||||
SevGuestState *sev = SEV_GUEST(obj);
|
||||
|
||||
sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE);
|
||||
sev->policy = DEFAULT_GUEST_POLICY;
|
||||
object_property_add_uint32_ptr(obj, "policy", &sev->policy,
|
||||
OBJ_PROP_FLAG_READWRITE);
|
||||
object_property_add_uint32_ptr(obj, "handle", &sev->handle,
|
||||
OBJ_PROP_FLAG_READWRITE);
|
||||
object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos,
|
||||
OBJ_PROP_FLAG_READWRITE);
|
||||
object_property_add_uint32_ptr(obj, "reduced-phys-bits",
|
||||
&sev->reduced_phys_bits,
|
||||
OBJ_PROP_FLAG_READWRITE);
|
||||
}
|
||||
|
||||
/* sev guest info */
|
||||
static const TypeInfo sev_guest_info = {
|
||||
.parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT,
|
||||
.name = TYPE_SEV_GUEST,
|
||||
.instance_size = sizeof(SevGuestState),
|
||||
.instance_finalize = sev_guest_finalize,
|
||||
.class_init = sev_guest_class_init,
|
||||
.instance_init = sev_guest_instance_init,
|
||||
.interfaces = (InterfaceInfo[]) {
|
||||
{ TYPE_USER_CREATABLE },
|
||||
{ }
|
||||
}
|
||||
};
|
||||
|
||||
static void
|
||||
sev_register_types(void)
|
||||
{
|
||||
|
||||
@@ -57,6 +57,4 @@ int sev_inject_launch_secret(const char *hdr, const char *secret,
|
||||
int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size);
|
||||
void sev_es_set_reset_vector(CPUState *cpu);
|
||||
|
||||
int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -340,6 +340,11 @@ int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!kvm_enabled()) {
|
||||
error_setg(errp, "Protected Virtualization requires KVM");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!s390_has_feat(S390_FEAT_UNPACK)) {
|
||||
error_setg(errp,
|
||||
"CPU model does not support Protected Virtualization");
|
||||
@@ -364,6 +369,9 @@ OBJECT_DEFINE_TYPE_WITH_INTERFACES(S390PVGuest,
|
||||
|
||||
static void s390_pv_guest_class_init(ObjectClass *oc, void *data)
|
||||
{
|
||||
ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
|
||||
|
||||
klass->kvm_init = s390_pv_kvm_init;
|
||||
}
|
||||
|
||||
static void s390_pv_guest_init(Object *obj)
|
||||
|
||||
@@ -80,18 +80,4 @@ static inline int kvm_s390_dump_mem_state(uint64_t addr, size_t len,
|
||||
static inline int kvm_s390_dump_completion_data(void *buff) { return 0; }
|
||||
#endif /* CONFIG_KVM */
|
||||
|
||||
int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);
|
||||
static inline int s390_pv_init(ConfidentialGuestSupport *cgs, Error **errp)
|
||||
{
|
||||
if (!cgs) {
|
||||
return 0;
|
||||
}
|
||||
if (kvm_enabled()) {
|
||||
return s390_pv_kvm_init(cgs, errp);
|
||||
}
|
||||
|
||||
error_setg(errp, "Protected Virtualization requires KVM");
|
||||
return -1;
|
||||
}
|
||||
|
||||
#endif /* HW_S390_PV_H */
|
||||
|
||||
Reference in New Issue
Block a user