Compare commits
299 Commits
qdev-array
...
v7.2-sle15
Author | SHA1 | Date | |
---|---|---|---|
6085814609 | |||
8862a75cd6 | |||
|
cf81a7f5d6 | ||
|
8021ecb26f | ||
|
5719fe12e6 | ||
6c9d5de6f2 | |||
|
a2dad04cfd | ||
|
a7b2246db1 | ||
|
3f970d925b | ||
|
bef5f455a1 | ||
0f32d7ee85 | |||
|
64151aab55 | ||
|
92618bab8a | ||
|
e4102870ef | ||
|
15426b13d1 | ||
cdae908c02 | |||
|
c49330b586 | ||
|
4ad59ecbd1 | ||
dad3f53fda | |||
|
ce4a21692d | ||
|
3825434f12 | ||
|
d1b2091899 | ||
|
bae107c3fc | ||
|
54669a99cd | ||
|
b8f20f22b3 | ||
|
b6fad87c88 | ||
|
a26e18306b | ||
|
b8f900de8a | ||
|
aa17838f4b | ||
|
b3889651c0 | ||
|
4416a893bf | ||
|
d57bc5e264 | ||
addf977986 | |||
0f0a481d7f | |||
fcffc18e6b | |||
|
177b3f0f25 | ||
|
505ef4fc71 | ||
|
1a7e5e4ac0 | ||
|
79b83c46a3 | ||
|
a9e07364f4 | ||
|
3bd558cca6 | ||
|
dcb2c79d00 | ||
|
e44d43d8c4 | ||
|
6fbf65fc33 | ||
|
1227330e7e | ||
|
675b07c6f9 | ||
|
20a1248b75 | ||
|
2b7c2e66dd | ||
|
6e53fc839c | ||
|
1ea55257a0 | ||
|
a9e2064e49 | ||
|
6e8132e3ab | ||
|
cf015bf13b | ||
|
3172e362ec | ||
|
c159594a2f | ||
|
70b6bc1f21 | ||
|
e2fd42edbc | ||
|
5da2d79b71 | ||
|
c68f6c95ac | ||
|
391c541664 | ||
|
680b9f336b | ||
|
f77734f669 | ||
|
b5cafa9dc4 | ||
|
cffa142905 | ||
|
b9e71c8209 | ||
|
096e51bfcd | ||
|
a43b17b361 | ||
|
a038e1f049 | ||
|
06270a5863 | ||
|
8f83837129 | ||
|
b55b95c2a5 | ||
|
b2dae7c30e | ||
|
c992aacda0 | ||
|
3b4ec1ffef | ||
|
64c33b7242 | ||
|
2da68cbca4 | ||
|
418db0865f | ||
|
618a5ac329 | ||
|
970b4e9fea | ||
|
349f7b8820 | ||
|
d4fa41750f | ||
|
e2c09409f4 | ||
|
b15032c507 | ||
|
3c6dafc8c2 | ||
|
2339c7a6dc | ||
|
290e4dbb60 | ||
|
f9fd280bf3 | ||
|
1906c6bb08 | ||
|
36cb60436e | ||
|
d7851975e9 | ||
|
4e168d7c15 | ||
|
fb35cb6e5a | ||
|
6cb2305e5a | ||
|
069c561bf2 | ||
|
affb22d740 | ||
|
e848b88b59 | ||
|
f1050c8815 | ||
|
1331bb66ee | ||
|
225e6bd582 | ||
|
c3ada4bc28 | ||
|
e6d1beea29 | ||
|
38dafdee72 | ||
|
ea93caa1df | ||
|
9994869161 | ||
|
bf1be8526e | ||
|
1d88724aef | ||
|
1ecc52f24e | ||
|
43994affd3 | ||
|
0ab7f737e4 | ||
|
b4fba14d06 | ||
|
a081f00a2d | ||
|
2156f0dc89 | ||
|
b747149380 | ||
|
47ec3407be | ||
|
f6b692a9ed | ||
|
cbccd84d4a | ||
|
bc0aac1c9c | ||
|
4fff1c5f44 | ||
|
18186f8ef4 | ||
|
f46e62b2ce | ||
|
fb957e38ed | ||
|
b581c14b8f | ||
|
7e81998399 | ||
|
d2eaa48ab8 | ||
|
67fccc4ea7 | ||
|
b1afb83cc4 | ||
|
09716837e6 | ||
|
db86d2719e | ||
|
4ffb93d4c0 | ||
|
bc48e23be6 | ||
|
e2b3d21370 | ||
|
acf8c03629 | ||
|
3dd2583704 | ||
|
801205587c | ||
|
2fe361465d | ||
|
d5c56c0397 | ||
|
bc945a9bf4 | ||
|
2c30f947f6 | ||
|
3f62a78ff8 | ||
|
cb84b97864 | ||
|
fb6604858c | ||
|
4971826b83 | ||
|
a118d57720 | ||
|
fa8d694169 | ||
|
b1061b2710 | ||
|
b751677b76 | ||
|
f1fb41f8f1 | ||
|
8353838a7e | ||
|
1e6d0e2c2a | ||
|
39f30a43ed | ||
|
656aa3b7cd | ||
|
edb1ee6571 | ||
|
f3b07ef6c5 | ||
|
29ccf7444a | ||
|
fc85e57efd | ||
|
74f6267c0b | ||
|
43ab451a06 | ||
|
3f841ebcf6 | ||
|
983dd8de9c | ||
|
9951082c68 | ||
|
15bf83a17c | ||
|
4478bccdcd | ||
|
6851a17b66 | ||
|
e4049cd161 | ||
|
b8fc99d9d6 | ||
|
80224d908c | ||
|
3635086ab7 | ||
|
3a34437396 | ||
|
7af92ba959 | ||
|
468732dd24 | ||
|
d1d88a351a | ||
|
1bf0585f28 | ||
|
d5f6f88616 | ||
|
46f97395b2 | ||
|
5b589ad6c4 | ||
|
6ae1ddc7a0 | ||
|
95a29643ee | ||
|
b279da2dbf | ||
|
17f8f07a5a | ||
|
9681bed760 | ||
|
9442e343af | ||
|
67fe309f29 | ||
|
35bb3ae328 | ||
|
145ff86e10 | ||
|
4788313936 | ||
|
85e0eee973 | ||
|
0dbe98e711 | ||
|
348249ad50 | ||
|
69f93dfde1 | ||
|
6901ae7bc8 | ||
|
9bed142374 | ||
|
54336bbdf6 | ||
|
f9a2863232 | ||
|
6217ab5179 | ||
|
99f2fac34f | ||
|
aec51432af | ||
|
e7c518ca8a | ||
|
db47070c7c | ||
|
42aa974079 | ||
|
b77b546834 | ||
|
a9ddca4142 | ||
|
8b04e95426 | ||
|
593671ebce | ||
|
5d917e88d6 | ||
|
912762d522 | ||
|
96aa9f1083 | ||
|
c4d674be3e | ||
|
7d858acbc0 | ||
|
b45e700110 | ||
|
70a3594f97 | ||
|
f62337a763 | ||
|
ee291bd628 | ||
|
4f4e12bb07 | ||
|
3bb98d08fb | ||
|
2eb80ef8c1 | ||
|
0b86ae5cdc | ||
|
005e8325ee | ||
|
dda4315b2a | ||
|
81fe2d0f16 | ||
|
743824b59b | ||
|
db1ecba438 | ||
|
7df7868501 | ||
|
261da2aeb1 | ||
|
ba42e054eb | ||
|
2f040850b3 | ||
|
ee89cd69e2 | ||
|
af90d08ca9 | ||
|
54d1d9ffd1 | ||
|
2a59c98384 | ||
|
38ac2f8443 | ||
|
952f625852 | ||
|
2efa52c474 | ||
|
41f771db58 | ||
|
439b72d20e | ||
|
c45ab0ba40 | ||
|
8e86183bbf | ||
|
e4350eda73 | ||
|
2e0f530a5c | ||
|
0de3c7d0e9 | ||
|
d019b30361 | ||
|
6d8bf4863e | ||
|
cc60684fc2 | ||
|
10be525445 | ||
|
cd8728ef7b | ||
|
f28e8e5928 | ||
|
82a12b6388 | ||
|
4692424434 | ||
|
1a4241b349 | ||
|
817ec4042d | ||
|
6284334637 | ||
|
340e40c4c1 | ||
|
8b093833d0 | ||
|
e7bfe99519 | ||
|
db39b7f72c | ||
|
2ea6dd3a85 | ||
|
7948804e02 | ||
|
b9ec2099cb | ||
|
2d3cb9cbe5 | ||
|
eb8297ede3 | ||
|
bc0b6b13f1 | ||
|
ecaa40f4cd | ||
|
2a05a0d815 | ||
|
5ffe9afc3e | ||
|
1761448f73 | ||
|
cea7e11300 | ||
|
f0890b158c | ||
|
69635867c0 | ||
|
9e067556ad | ||
|
33e1ff7473 | ||
|
3a89607268 | ||
|
22ec9f00a0 | ||
|
9f45999fcf | ||
|
1ed2cb5335 | ||
|
0c66491293 | ||
|
c612db6a54 | ||
|
718b21c0c9 | ||
|
fb5e9dce58 | ||
|
ae647137d4 | ||
|
1df9b43478 | ||
|
eff655c233 | ||
|
8df298d40f | ||
|
15f1a00fd6 | ||
|
7d3593f713 | ||
|
f0cd8a749e | ||
|
ff9c5f957d | ||
|
61676d0a17 | ||
|
afda9fd819 | ||
|
8974eaed60 | ||
|
17749fb761 | ||
|
ff43b5ea55 | ||
|
6090d459c8 | ||
|
ceb498da31 | ||
|
f42710517a | ||
|
a0a0356992 | ||
|
829a938249 | ||
|
495d863235 | ||
|
2b9ff6f7d0 | ||
|
7dd9a14353 | ||
|
7b52a63bb1 |
14
.gitmodules
vendored
14
.gitmodules
vendored
@@ -1,12 +1,12 @@
|
||||
[submodule "roms/seabios"]
|
||||
path = roms/seabios
|
||||
url = https://gitlab.com/qemu-project/seabios.git/
|
||||
url = https://gitlab.suse.de/virtualization/qemu-seabios.git
|
||||
[submodule "roms/SLOF"]
|
||||
path = roms/SLOF
|
||||
url = https://gitlab.com/qemu-project/SLOF.git
|
||||
[submodule "roms/ipxe"]
|
||||
path = roms/ipxe
|
||||
url = https://gitlab.com/qemu-project/ipxe.git
|
||||
url = https://gitlab.suse.de/virtualization/qemu-ipxe.git
|
||||
[submodule "roms/openbios"]
|
||||
path = roms/openbios
|
||||
url = https://gitlab.com/qemu-project/openbios.git
|
||||
@@ -15,7 +15,7 @@
|
||||
url = https://gitlab.com/qemu-project/qemu-palcode.git
|
||||
[submodule "roms/sgabios"]
|
||||
path = roms/sgabios
|
||||
url = https://gitlab.com/qemu-project/sgabios.git
|
||||
url = https://gitlab.suse.de/virtualization/qemu-sgabios.git
|
||||
[submodule "dtc"]
|
||||
path = dtc
|
||||
url = https://gitlab.com/qemu-project/dtc.git
|
||||
@@ -24,7 +24,7 @@
|
||||
url = https://gitlab.com/qemu-project/u-boot.git
|
||||
[submodule "roms/skiboot"]
|
||||
path = roms/skiboot
|
||||
url = https://gitlab.com/qemu-project/skiboot.git
|
||||
url = https://gitlab.suse.de/virtualization/qemu-skiboot.git
|
||||
[submodule "roms/QemuMacDrivers"]
|
||||
path = roms/QemuMacDrivers
|
||||
url = https://gitlab.com/qemu-project/QemuMacDrivers.git
|
||||
@@ -45,13 +45,13 @@
|
||||
url = https://gitlab.com/qemu-project/berkeley-softfloat-3.git
|
||||
[submodule "roms/edk2"]
|
||||
path = roms/edk2
|
||||
url = https://gitlab.com/qemu-project/edk2.git
|
||||
url = https://gitlab.suse.de/virtualization/qemu-edk2.git
|
||||
[submodule "roms/opensbi"]
|
||||
path = roms/opensbi
|
||||
url = https://gitlab.com/qemu-project/opensbi.git
|
||||
url = https://gitlab.com/qemu-project/qemu-opensbi.git
|
||||
[submodule "roms/qboot"]
|
||||
path = roms/qboot
|
||||
url = https://gitlab.com/qemu-project/qboot.git
|
||||
url = https://gitlab.suse.de/virtualization/qemu-qboot.git
|
||||
[submodule "meson"]
|
||||
path = meson
|
||||
url = https://gitlab.com/qemu-project/meson.git
|
||||
|
@@ -46,6 +46,8 @@
|
||||
#include "sysemu/hw_accel.h"
|
||||
#include "kvm-cpus.h"
|
||||
#include "sysemu/dirtylimit.h"
|
||||
#include "migration/migration.h"
|
||||
#include "migration/misc.h"
|
||||
|
||||
#include "hw/boards.h"
|
||||
#include "monitor/stats.h"
|
||||
@@ -101,6 +103,8 @@ bool kvm_direct_msi_allowed;
|
||||
bool kvm_ioeventfd_any_length_allowed;
|
||||
bool kvm_msi_use_devid;
|
||||
bool kvm_has_guest_debug;
|
||||
bool kvm_ram_default_shared;
|
||||
int kvm_vm_type;
|
||||
static int kvm_sstep_flags;
|
||||
static bool kvm_immediate_exit;
|
||||
static hwaddr kvm_max_slot_size = ~0;
|
||||
@@ -286,34 +290,42 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
|
||||
static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, bool new)
|
||||
{
|
||||
KVMState *s = kvm_state;
|
||||
struct kvm_userspace_memory_region mem;
|
||||
struct kvm_userspace_memory_region_ext mem;
|
||||
int ret;
|
||||
|
||||
mem.slot = slot->slot | (kml->as_id << 16);
|
||||
mem.guest_phys_addr = slot->start_addr;
|
||||
mem.userspace_addr = (unsigned long)slot->ram;
|
||||
mem.flags = slot->flags;
|
||||
mem.region.slot = slot->slot | (kml->as_id << 16);
|
||||
mem.region.guest_phys_addr = slot->start_addr;
|
||||
mem.region.userspace_addr = (unsigned long)slot->ram;
|
||||
mem.region.flags = slot->flags;
|
||||
mem.restricted_fd = slot->fd;
|
||||
mem.restricted_offset = slot->ofs;
|
||||
|
||||
if (slot->memory_size && !new && (mem.flags ^ slot->old_flags) & KVM_MEM_READONLY) {
|
||||
if (slot->memory_size && !new && (slot->flags ^ slot->old_flags) & KVM_MEM_READONLY) {
|
||||
/* Set the slot size to 0 before setting the slot to the desired
|
||||
* value. This is needed based on KVM commit 75d61fbc. */
|
||||
mem.memory_size = 0;
|
||||
mem.region.memory_size = 0;
|
||||
ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
|
||||
if (ret < 0) {
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
mem.memory_size = slot->memory_size;
|
||||
mem.region.memory_size = slot->memory_size;
|
||||
ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
|
||||
slot->old_flags = mem.flags;
|
||||
slot->old_flags = mem.region.flags;
|
||||
err:
|
||||
trace_kvm_set_user_memory(mem.slot, mem.flags, mem.guest_phys_addr,
|
||||
mem.memory_size, mem.userspace_addr, ret);
|
||||
trace_kvm_set_user_memory(mem.region.slot >> 16, (uint16_t)mem.region.slot,
|
||||
mem.region.flags, mem.region.guest_phys_addr,
|
||||
mem.region.memory_size,
|
||||
mem.region.userspace_addr, ret);
|
||||
if (ret < 0) {
|
||||
error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d,"
|
||||
" start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s",
|
||||
__func__, mem.slot, slot->start_addr,
|
||||
(uint64_t)mem.memory_size, strerror(errno));
|
||||
" start=0x%" PRIx64 ", size=0x%" PRIx64 ","
|
||||
" flags=0x%" PRIx32 ","
|
||||
" restricted_fd=%" PRId32 ", restricted_offset=0x%" PRIx64 ": %s",
|
||||
__func__, mem.region.slot, slot->start_addr,
|
||||
(uint64_t)mem.region.memory_size, mem.region.flags,
|
||||
mem.restricted_fd, (uint64_t)mem.restricted_offset,
|
||||
strerror(errno));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@@ -385,6 +397,11 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
|
||||
return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
|
||||
}
|
||||
|
||||
int __attribute__ ((weak)) kvm_arch_pre_create_vcpu(CPUState *cpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_init_vcpu(CPUState *cpu, Error **errp)
|
||||
{
|
||||
KVMState *s = kvm_state;
|
||||
@@ -393,15 +410,29 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
|
||||
|
||||
trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
|
||||
|
||||
/*
|
||||
* tdx_pre_create_vcpu() may call cpu_x86_cpuid(). It in turn may call
|
||||
* kvm_vm_ioctl(). Set cpu->kvm_state in advance to avoid NULL pointer
|
||||
* dereference.
|
||||
*/
|
||||
cpu->kvm_state = s;
|
||||
ret = kvm_arch_pre_create_vcpu(cpu);
|
||||
if (ret < 0) {
|
||||
error_setg_errno(errp, -ret,
|
||||
"kvm_init_vcpu: kvm_arch_pre_create_vcpu() failed");
|
||||
cpu->kvm_state = NULL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
|
||||
if (ret < 0) {
|
||||
error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)",
|
||||
kvm_arch_vcpu_id(cpu));
|
||||
cpu->kvm_state = NULL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
cpu->kvm_fd = ret;
|
||||
cpu->kvm_state = s;
|
||||
cpu->vcpu_dirty = true;
|
||||
cpu->dirty_pages = 0;
|
||||
cpu->throttle_us_per_full = 0;
|
||||
@@ -467,6 +498,9 @@ static int kvm_mem_flags(MemoryRegion *mr)
|
||||
if (readonly && kvm_readonly_mem_allowed) {
|
||||
flags |= KVM_MEM_READONLY;
|
||||
}
|
||||
if (mr->ram_block && mr->ram_block->restricted_fd > 0) {
|
||||
flags |= KVM_MEM_PRIVATE;
|
||||
}
|
||||
return flags;
|
||||
}
|
||||
|
||||
@@ -976,7 +1010,8 @@ static int kvm_physical_log_clear(KVMMemoryListener *kml,
|
||||
KVMSlot *mem;
|
||||
int ret = 0, i;
|
||||
|
||||
if (!s->manual_dirty_log_protect) {
|
||||
if (!s->manual_dirty_log_protect ||
|
||||
runstate_check(RUN_STATE_FINISH_MIGRATE)) {
|
||||
/* No need to do explicit clear */
|
||||
return ret;
|
||||
}
|
||||
@@ -1384,6 +1419,9 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
|
||||
mem->ram_start_offset = ram_start_offset;
|
||||
mem->ram = ram;
|
||||
mem->flags = kvm_mem_flags(mr);
|
||||
mem->fd = mr->ram_block->restricted_fd;
|
||||
mem->ofs = (uint8_t*)ram - mr->ram_block->host;
|
||||
|
||||
kvm_slot_init_dirty_bitmap(mem);
|
||||
err = kvm_set_user_memory_region(kml, mem, true);
|
||||
if (err) {
|
||||
@@ -1391,6 +1429,25 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
|
||||
strerror(-err));
|
||||
abort();
|
||||
}
|
||||
|
||||
/* If TDX VM, make the low mem (0~2GB) private. */
|
||||
if (!kvm_ram_default_shared &&
|
||||
object_dynamic_cast(OBJECT(MACHINE(qdev_get_machine())->cgs),
|
||||
"tdx-guest") &&
|
||||
memory_region_can_be_private(mr)) {
|
||||
if (start_addr == 0) {
|
||||
hwaddr convert_size = 2UL << 30;
|
||||
if (convert_size > slot_size)
|
||||
convert_size = slot_size;
|
||||
err = kvm_encrypt_reg_region(start_addr, convert_size, true);
|
||||
if (err) {
|
||||
fprintf(stderr, "%s: error converting slot to private: %s\n",
|
||||
__func__, strerror(-err));
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
start_addr += slot_size;
|
||||
ram_start_offset += slot_size;
|
||||
ram += slot_size;
|
||||
@@ -2359,6 +2416,7 @@ static int kvm_init(MachineState *ms)
|
||||
} else if (mc->kvm_type) {
|
||||
type = mc->kvm_type(ms, NULL);
|
||||
}
|
||||
kvm_vm_type = type;
|
||||
|
||||
do {
|
||||
ret = kvm_ioctl(s, KVM_CREATE_VM, type);
|
||||
@@ -2808,6 +2866,80 @@ static void kvm_eat_signals(CPUState *cpu)
|
||||
} while (sigismember(&chkset, SIG_IPI));
|
||||
}
|
||||
|
||||
int kvm_encrypt_reg_region(hwaddr start, hwaddr size, bool reg_region)
|
||||
{
|
||||
int r;
|
||||
struct kvm_memory_attributes attr;
|
||||
attr.attributes = reg_region ? KVM_MEMORY_ATTRIBUTE_PRIVATE : 0;
|
||||
|
||||
attr.address = start;
|
||||
attr.size = size;
|
||||
attr.flags = 0;
|
||||
|
||||
r = kvm_vm_ioctl(kvm_state, KVM_SET_MEMORY_ATTRIBUTES, &attr);
|
||||
if (r || attr.size != 0) {
|
||||
warn_report("%s: failed to set memory attr (0x%lx+%#zx) error '%s'",
|
||||
__func__, start, size, strerror(errno));
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static bool is_postcopy_private_fault(RAMBlock *rb, ram_addr_t offset,
|
||||
bool shared_to_private)
|
||||
{
|
||||
unsigned long bit = offset >> TARGET_PAGE_BITS;
|
||||
|
||||
if (!shared_to_private || !migration_in_incoming_postcopy()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return !test_bit(bit, rb->receivedmap);
|
||||
}
|
||||
|
||||
int kvm_convert_memory(hwaddr start, hwaddr size,
|
||||
bool shared_to_private, int cpu_index)
|
||||
{
|
||||
MemoryRegionSection section;
|
||||
void *addr;
|
||||
RAMBlock *rb;
|
||||
ram_addr_t offset;
|
||||
int ret = -1;
|
||||
|
||||
trace_kvm_convert_memory(start, size, shared_to_private ? "shared_to_private" : "private_to_shared");
|
||||
section = memory_region_find(get_system_memory(), start, size);
|
||||
if (!section.mr) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (memory_region_can_be_private(section.mr)) {
|
||||
addr = memory_region_get_ram_ptr(section.mr) +
|
||||
section.offset_within_region;
|
||||
rb = qemu_ram_block_from_host(addr, false, &offset);
|
||||
if (is_postcopy_private_fault(rb, offset, shared_to_private)) {
|
||||
assert(cpu_index != INT_MAX);
|
||||
postcopy_add_private_fault_to_pending_list(rb, offset,
|
||||
start, cpu_index);
|
||||
ret = kvm_encrypt_reg_region(start, size, shared_to_private);
|
||||
} else {
|
||||
ret = kvm_encrypt_reg_region(start, size, shared_to_private);
|
||||
|
||||
/*
|
||||
* With KVM_MEMORY_(UN)ENCRYPT_REG_REGION by kvm_encrypt_reg_region(),
|
||||
* operation on underlying file descriptor is only for releasing
|
||||
* unnecessary pages.
|
||||
*/
|
||||
memory_region_convert_mem_attr(§ion, !shared_to_private);
|
||||
(void)ram_block_convert_range(rb, offset, size, shared_to_private);
|
||||
}
|
||||
} else {
|
||||
warn_report("Unknown start 0x%"HWADDR_PRIx" size 0x%"HWADDR_PRIx" shared_to_private %d",
|
||||
start, size, shared_to_private);
|
||||
}
|
||||
|
||||
memory_region_unref(section.mr);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvm_cpu_exec(CPUState *cpu)
|
||||
{
|
||||
struct kvm_run *run = cpu->kvm_run;
|
||||
@@ -2966,6 +3098,11 @@ int kvm_cpu_exec(CPUState *cpu)
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case KVM_EXIT_MEMORY_FAULT:
|
||||
ret = kvm_convert_memory(run->memory.gpa, run->memory.size,
|
||||
run->memory.flags & KVM_MEMORY_EXIT_FLAG_PRIVATE,
|
||||
cpu->cpu_index);
|
||||
break;
|
||||
default:
|
||||
DPRINTF("kvm_arch_handle_exit\n");
|
||||
ret = kvm_arch_handle_exit(cpu, run);
|
||||
@@ -3556,6 +3693,21 @@ static void kvm_set_kernel_irqchip(Object *obj, Visitor *v,
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_setup_set_memory_region_debug_ops(struct KVMState *s,
|
||||
set_memory_region_debug_ops new_ops)
|
||||
{
|
||||
if (s)
|
||||
s->set_mr_debug_ops = new_ops;
|
||||
}
|
||||
|
||||
void kvm_set_memory_region_debug_ops(void *handle, MemoryRegion *mr)
|
||||
{
|
||||
if (!kvm_state || !kvm_state->set_mr_debug_ops)
|
||||
return;
|
||||
|
||||
kvm_state->set_mr_debug_ops(handle, mr);
|
||||
}
|
||||
|
||||
bool kvm_kernel_irqchip_allowed(void)
|
||||
{
|
||||
return kvm_state->kernel_irqchip_allowed;
|
||||
|
@@ -15,7 +15,7 @@ kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
|
||||
kvm_irqchip_release_virq(int virq) "virq %d"
|
||||
kvm_set_ioeventfd_mmio(int fd, uint64_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%" PRIx64 " val=0x%x assign: %d size: %d match: %d"
|
||||
kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%x val=0x%x assign: %d size: %d match: %d"
|
||||
kvm_set_user_memory(uint32_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d"
|
||||
kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d"
|
||||
kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32
|
||||
kvm_resample_fd_notify(int gsi) "gsi %d"
|
||||
kvm_dirty_ring_full(int id) "vcpu %d"
|
||||
@@ -25,4 +25,4 @@ kvm_dirty_ring_reaper(const char *s) "%s"
|
||||
kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages (took %"PRIi64" us)"
|
||||
kvm_dirty_ring_reaper_kick(const char *reason) "%s"
|
||||
kvm_dirty_ring_flush(int finished) "%d"
|
||||
|
||||
kvm_convert_memory(uint64_t start, uint64_t size, const char *msg) "start 0x%" PRIx64 " size 0x%" PRIx64 " %s"
|
||||
|
@@ -132,3 +132,14 @@ uint32_t kvm_dirty_ring_size(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_setup_set_memory_region_debug_ops(struct KVMState *s,
|
||||
set_memory_region_debug_ops new_ops)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void kvm_set_memory_region_debug_ops(void *handle, MemoryRegion *mr)
|
||||
{
|
||||
|
||||
}
|
||||
|
@@ -20,8 +20,24 @@ OBJECT_DEFINE_ABSTRACT_TYPE(ConfidentialGuestSupport,
|
||||
CONFIDENTIAL_GUEST_SUPPORT,
|
||||
OBJECT)
|
||||
|
||||
static bool cgs_get_disable_pv_clock(Object *obj, Error **errp)
|
||||
{
|
||||
ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj);
|
||||
|
||||
return cgs->disable_pv_clock;
|
||||
}
|
||||
|
||||
static void cgs_set_disable_pv_clock(Object *obj, bool value, Error **errp)
|
||||
{
|
||||
ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj);
|
||||
|
||||
cgs->disable_pv_clock = value;
|
||||
}
|
||||
|
||||
static void confidential_guest_support_class_init(ObjectClass *oc, void *data)
|
||||
{
|
||||
object_class_property_add_bool(oc, CONFIDENTIAL_GUEST_SUPPORT_DISABLE_PV_CLOCK,
|
||||
cgs_get_disable_pv_clock, cgs_set_disable_pv_clock);
|
||||
}
|
||||
|
||||
static void confidential_guest_support_init(Object *obj)
|
||||
|
@@ -38,7 +38,7 @@ sgx_epc_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
|
||||
|
||||
name = object_get_canonical_path(OBJECT(backend));
|
||||
ram_flags = (backend->share ? RAM_SHARED : 0) | RAM_PROTECTED;
|
||||
memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend),
|
||||
memory_region_init_ram_from_fd(backend->mr, OBJECT(backend),
|
||||
name, backend->size, ram_flags,
|
||||
fd, 0, errp);
|
||||
g_free(name);
|
||||
|
@@ -56,7 +56,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
|
||||
ram_flags = backend->share ? RAM_SHARED : 0;
|
||||
ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
|
||||
ram_flags |= fb->is_pmem ? RAM_PMEM : 0;
|
||||
memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name,
|
||||
memory_region_init_ram_from_file(backend->mr, OBJECT(backend), name,
|
||||
backend->size, fb->align, ram_flags,
|
||||
fb->mem_path, fb->readonly, errp);
|
||||
g_free(name);
|
||||
@@ -174,8 +174,8 @@ static void file_backend_unparent(Object *obj)
|
||||
HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(obj);
|
||||
|
||||
if (host_memory_backend_mr_inited(backend) && fb->discard_data) {
|
||||
void *ptr = memory_region_get_ram_ptr(&backend->mr);
|
||||
uint64_t sz = memory_region_size(&backend->mr);
|
||||
void *ptr = memory_region_get_ram_ptr(backend->mr);
|
||||
uint64_t sz = memory_region_size(backend->mr);
|
||||
|
||||
qemu_madvise(ptr, sz, QEMU_MADV_REMOVE);
|
||||
}
|
||||
|
210
backends/hostmem-memfd-private.c
Normal file
210
backends/hostmem-memfd-private.c
Normal file
@@ -0,0 +1,210 @@
|
||||
/*
|
||||
* QEMU host private memfd memory backend
|
||||
*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* Authors:
|
||||
* Chao Peng <chao.p.peng@linux.intel.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "sysemu/hostmem.h"
|
||||
#include "qom/object_interfaces.h"
|
||||
#include "qemu/memfd.h"
|
||||
#include "qemu/module.h"
|
||||
#include "qapi/error.h"
|
||||
#include "qom/object.h"
|
||||
|
||||
#define TYPE_MEMORY_BACKEND_MEMFD_PRIVATE "memory-backend-memfd-private"
|
||||
|
||||
OBJECT_DECLARE_SIMPLE_TYPE(HostMemoryBackendPrivateMemfd,
|
||||
MEMORY_BACKEND_MEMFD_PRIVATE)
|
||||
|
||||
|
||||
struct HostMemoryBackendPrivateMemfd {
|
||||
HostMemoryBackend parent_obj;
|
||||
HostMemoryBackend *shmem;
|
||||
|
||||
bool hugetlb;
|
||||
uint64_t hugetlbsize;
|
||||
char *path;
|
||||
};
|
||||
|
||||
static void
|
||||
priv_memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
|
||||
{
|
||||
HostMemoryBackendPrivateMemfd *m = MEMORY_BACKEND_MEMFD_PRIVATE(backend);
|
||||
uint32_t ram_flags;
|
||||
char *name;
|
||||
int fd, priv_fd;
|
||||
unsigned int flags;
|
||||
int mount_fd;
|
||||
|
||||
if (!backend->size) {
|
||||
error_setg(errp, "can't create backend with size 0");
|
||||
return;
|
||||
}
|
||||
|
||||
if (m->shmem) {
|
||||
assert(m->shmem->mr);
|
||||
backend->mr = m->shmem->mr;
|
||||
} else {
|
||||
fd = qemu_memfd_create("memory-backend-memfd-shared", backend->size,
|
||||
m->hugetlb, m->hugetlbsize, 0, errp);
|
||||
if (fd == -1) {
|
||||
return;
|
||||
}
|
||||
|
||||
name = host_memory_backend_get_name(backend);
|
||||
ram_flags = backend->share ? RAM_SHARED : 0;
|
||||
ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
|
||||
memory_region_init_ram_from_fd(backend->mr, OBJECT(backend), name,
|
||||
backend->size, ram_flags, fd, 0, errp);
|
||||
g_free(name);
|
||||
}
|
||||
|
||||
flags = 0;
|
||||
mount_fd = -1;
|
||||
if (m->path) {
|
||||
flags = RMFD_USERMNT;
|
||||
mount_fd = open_tree(AT_FDCWD, m->path, OPEN_TREE_CLOEXEC);
|
||||
if (mount_fd == -1) {
|
||||
error_setg(errp, "open_tree() failed at %s: %s",
|
||||
m->path, strerror(errno));
|
||||
return;
|
||||
}
|
||||
}
|
||||
priv_fd = qemu_memfd_restricted(backend->size, flags, mount_fd, errp);
|
||||
if (mount_fd >= 0) {
|
||||
close(mount_fd);
|
||||
}
|
||||
if (priv_fd == -1) {
|
||||
return;
|
||||
}
|
||||
|
||||
memory_region_set_restricted_fd(backend->mr, priv_fd);
|
||||
ram_block_alloc_cgs_bitmap(backend->mr->ram_block);
|
||||
}
|
||||
|
||||
static bool
|
||||
priv_memfd_backend_get_hugetlb(Object *o, Error **errp)
|
||||
{
|
||||
return MEMORY_BACKEND_MEMFD_PRIVATE(o)->hugetlb;
|
||||
}
|
||||
|
||||
static void
|
||||
priv_memfd_backend_set_hugetlb(Object *o, bool value, Error **errp)
|
||||
{
|
||||
MEMORY_BACKEND_MEMFD_PRIVATE(o)->hugetlb = value;
|
||||
}
|
||||
|
||||
static void
|
||||
priv_memfd_backend_set_hugetlbsize(Object *obj, Visitor *v, const char *name,
|
||||
void *opaque, Error **errp)
|
||||
{
|
||||
HostMemoryBackendPrivateMemfd *m = MEMORY_BACKEND_MEMFD_PRIVATE(obj);
|
||||
uint64_t value;
|
||||
|
||||
if (host_memory_backend_mr_inited(MEMORY_BACKEND(obj))) {
|
||||
error_setg(errp, "cannot change property value");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!visit_type_size(v, name, &value, errp)) {
|
||||
return;
|
||||
}
|
||||
if (!value) {
|
||||
error_setg(errp, "Property '%s.%s' doesn't take value '%" PRIu64 "'",
|
||||
object_get_typename(obj), name, value);
|
||||
return;
|
||||
}
|
||||
m->hugetlbsize = value;
|
||||
}
|
||||
|
||||
static void
|
||||
priv_memfd_backend_get_hugetlbsize(Object *obj, Visitor *v, const char *name,
|
||||
void *opaque, Error **errp)
|
||||
{
|
||||
HostMemoryBackendPrivateMemfd *m = MEMORY_BACKEND_MEMFD_PRIVATE(obj);
|
||||
uint64_t value = m->hugetlbsize;
|
||||
|
||||
visit_type_size(v, name, &value, errp);
|
||||
}
|
||||
|
||||
static char *priv_memfd_backend_get_path(Object *obj, Error **errp)
|
||||
{
|
||||
HostMemoryBackendPrivateMemfd *m = MEMORY_BACKEND_MEMFD_PRIVATE(obj);
|
||||
|
||||
return g_strdup(m->path);
|
||||
}
|
||||
|
||||
static void priv_memfd_backend_set_path(Object *obj, const char *value, Error **errp)
|
||||
{
|
||||
HostMemoryBackendPrivateMemfd *m = MEMORY_BACKEND_MEMFD_PRIVATE(obj);
|
||||
|
||||
g_free(m->path);
|
||||
m->path = g_strdup(value);
|
||||
}
|
||||
|
||||
static void
|
||||
priv_memfd_backend_instance_init(Object *obj)
|
||||
{
|
||||
MEMORY_BACKEND(obj)->reserve = false;
|
||||
}
|
||||
|
||||
static void
|
||||
priv_memfd_backend_class_init(ObjectClass *oc, void *data)
|
||||
{
|
||||
HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc);
|
||||
|
||||
bc->alloc = priv_memfd_backend_memory_alloc;
|
||||
|
||||
object_class_property_add_str(oc, "path",
|
||||
priv_memfd_backend_get_path,
|
||||
priv_memfd_backend_set_path);
|
||||
object_class_property_set_description(oc, "path",
|
||||
"path to mount point of shmfs");
|
||||
|
||||
object_class_property_add_link(oc,
|
||||
"shmemdev",
|
||||
TYPE_MEMORY_BACKEND,
|
||||
offsetof(HostMemoryBackendPrivateMemfd, shmem),
|
||||
object_property_allow_set_link,
|
||||
OBJ_PROP_LINK_STRONG);
|
||||
object_class_property_set_description(oc, "shmemdev",
|
||||
"memory backend for shared memory");
|
||||
|
||||
if (qemu_memfd_check(MFD_HUGETLB)) {
|
||||
object_class_property_add_bool(oc, "hugetlb",
|
||||
priv_memfd_backend_get_hugetlb,
|
||||
priv_memfd_backend_set_hugetlb);
|
||||
object_class_property_set_description(oc, "hugetlb",
|
||||
"Use huge pages");
|
||||
object_class_property_add(oc, "hugetlbsize", "int",
|
||||
priv_memfd_backend_get_hugetlbsize,
|
||||
priv_memfd_backend_set_hugetlbsize,
|
||||
NULL, NULL);
|
||||
object_class_property_set_description(oc, "hugetlbsize",
|
||||
"Huge pages size (ex: 2M, 1G)");
|
||||
}
|
||||
}
|
||||
|
||||
static const TypeInfo priv_memfd_backend_info = {
|
||||
.name = TYPE_MEMORY_BACKEND_MEMFD_PRIVATE,
|
||||
.parent = TYPE_MEMORY_BACKEND,
|
||||
.instance_init = priv_memfd_backend_instance_init,
|
||||
.class_init = priv_memfd_backend_class_init,
|
||||
.instance_size = sizeof(HostMemoryBackendPrivateMemfd),
|
||||
};
|
||||
|
||||
static void register_types(void)
|
||||
{
|
||||
if (qemu_memfd_check(MFD_ALLOW_SEALING)) {
|
||||
type_register_static(&priv_memfd_backend_info);
|
||||
}
|
||||
}
|
||||
|
||||
type_init(register_types);
|
@@ -55,7 +55,7 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
|
||||
name = host_memory_backend_get_name(backend);
|
||||
ram_flags = backend->share ? RAM_SHARED : 0;
|
||||
ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
|
||||
memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name,
|
||||
memory_region_init_ram_from_fd(backend->mr, OBJECT(backend), name,
|
||||
backend->size, ram_flags, fd, 0, errp);
|
||||
g_free(name);
|
||||
}
|
||||
|
@@ -30,7 +30,7 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
|
||||
name = host_memory_backend_get_name(backend);
|
||||
ram_flags = backend->share ? RAM_SHARED : 0;
|
||||
ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
|
||||
memory_region_init_ram_flags_nomigrate(&backend->mr, OBJECT(backend), name,
|
||||
memory_region_init_ram_flags_nomigrate(backend->mr, OBJECT(backend), name,
|
||||
backend->size, ram_flags, errp);
|
||||
g_free(name);
|
||||
}
|
||||
|
@@ -169,8 +169,8 @@ static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
|
||||
}
|
||||
|
||||
if (value != backend->merge) {
|
||||
void *ptr = memory_region_get_ram_ptr(&backend->mr);
|
||||
uint64_t sz = memory_region_size(&backend->mr);
|
||||
void *ptr = memory_region_get_ram_ptr(backend->mr);
|
||||
uint64_t sz = memory_region_size(backend->mr);
|
||||
|
||||
qemu_madvise(ptr, sz,
|
||||
value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
|
||||
@@ -195,8 +195,8 @@ static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
|
||||
}
|
||||
|
||||
if (value != backend->dump) {
|
||||
void *ptr = memory_region_get_ram_ptr(&backend->mr);
|
||||
uint64_t sz = memory_region_size(&backend->mr);
|
||||
void *ptr = memory_region_get_ram_ptr(backend->mr);
|
||||
uint64_t sz = memory_region_size(backend->mr);
|
||||
|
||||
qemu_madvise(ptr, sz,
|
||||
value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
|
||||
@@ -228,9 +228,9 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value,
|
||||
}
|
||||
|
||||
if (value && !backend->prealloc) {
|
||||
int fd = memory_region_get_fd(&backend->mr);
|
||||
void *ptr = memory_region_get_ram_ptr(&backend->mr);
|
||||
uint64_t sz = memory_region_size(&backend->mr);
|
||||
int fd = memory_region_get_fd(backend->mr);
|
||||
void *ptr = memory_region_get_ram_ptr(backend->mr);
|
||||
uint64_t sz = memory_region_size(backend->mr);
|
||||
|
||||
qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads,
|
||||
backend->prealloc_context, &local_err);
|
||||
@@ -271,6 +271,8 @@ static void host_memory_backend_init(Object *obj)
|
||||
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
|
||||
MachineState *machine = MACHINE(qdev_get_machine());
|
||||
|
||||
backend->mr = &backend->base_mr;
|
||||
|
||||
/* TODO: convert access to globals to compat properties */
|
||||
backend->merge = machine_mem_merge(machine);
|
||||
backend->dump = machine_dump_guest_core(machine);
|
||||
@@ -289,12 +291,12 @@ bool host_memory_backend_mr_inited(HostMemoryBackend *backend)
|
||||
* NOTE: We forbid zero-length memory backend, so here zero means
|
||||
* "we haven't inited the backend memory region yet".
|
||||
*/
|
||||
return memory_region_size(&backend->mr) != 0;
|
||||
return memory_region_size(backend->mr) != 0;
|
||||
}
|
||||
|
||||
MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend)
|
||||
{
|
||||
return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL;
|
||||
return host_memory_backend_mr_inited(backend) ? backend->mr : NULL;
|
||||
}
|
||||
|
||||
void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
|
||||
@@ -309,7 +311,7 @@ bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
|
||||
|
||||
size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
|
||||
{
|
||||
size_t pagesize = qemu_ram_pagesize(memdev->mr.ram_block);
|
||||
size_t pagesize = qemu_ram_pagesize(memdev->mr->ram_block);
|
||||
g_assert(pagesize >= qemu_real_host_page_size());
|
||||
return pagesize;
|
||||
}
|
||||
@@ -329,8 +331,8 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
|
||||
goto out;
|
||||
}
|
||||
|
||||
ptr = memory_region_get_ram_ptr(&backend->mr);
|
||||
sz = memory_region_size(&backend->mr);
|
||||
ptr = memory_region_get_ram_ptr(backend->mr);
|
||||
sz = memory_region_size(backend->mr);
|
||||
|
||||
if (backend->merge) {
|
||||
qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
|
||||
@@ -384,7 +386,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
|
||||
* specified NUMA policy in place.
|
||||
*/
|
||||
if (backend->prealloc) {
|
||||
qemu_prealloc_mem(memory_region_get_fd(&backend->mr), ptr, sz,
|
||||
qemu_prealloc_mem(memory_region_get_fd(backend->mr), ptr, sz,
|
||||
backend->prealloc_threads,
|
||||
backend->prealloc_context, &local_err);
|
||||
if (local_err) {
|
||||
|
@@ -12,6 +12,7 @@ softmmu_ss.add([files(
|
||||
softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files('rng-random.c'))
|
||||
softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files('hostmem-file.c'))
|
||||
softmmu_ss.add(when: 'CONFIG_LINUX', if_true: files('hostmem-memfd.c'))
|
||||
softmmu_ss.add(when: 'CONFIG_LINUX', if_true: files('hostmem-memfd-private.c'))
|
||||
if keyutils.found()
|
||||
softmmu_ss.add(keyutils, files('cryptodev-lkcf.c'))
|
||||
endif
|
||||
|
44
block/curl.c
44
block/curl.c
@@ -37,8 +37,15 @@
|
||||
|
||||
// #define DEBUG_VERBOSE
|
||||
|
||||
/* CURL 7.85.0 switches to a string based API for specifying
|
||||
* the desired protocols.
|
||||
*/
|
||||
#if LIBCURL_VERSION_NUM >= 0x075500
|
||||
#define PROTOCOLS "HTTP,HTTPS,FTP,FTPS"
|
||||
#else
|
||||
#define PROTOCOLS (CURLPROTO_HTTP | CURLPROTO_HTTPS | \
|
||||
CURLPROTO_FTP | CURLPROTO_FTPS)
|
||||
#endif
|
||||
|
||||
#define CURL_NUM_STATES 8
|
||||
#define CURL_NUM_ACB 8
|
||||
@@ -509,9 +516,18 @@ static int curl_init_state(BDRVCURLState *s, CURLState *state)
|
||||
* obscure protocols. For example, do not allow POP3/SMTP/IMAP see
|
||||
* CVE-2013-0249.
|
||||
*
|
||||
* Restricting protocols is only supported from 7.19.4 upwards.
|
||||
* Restricting protocols is only supported from 7.19.4 upwards. Note:
|
||||
* version 7.85.0 deprecates CURLOPT_*PROTOCOLS in favour of a string
|
||||
* based CURLOPT_*PROTOCOLS_STR API.
|
||||
*/
|
||||
#if LIBCURL_VERSION_NUM >= 0x071304
|
||||
#if LIBCURL_VERSION_NUM >= 0x075500
|
||||
if (curl_easy_setopt(state->curl,
|
||||
CURLOPT_PROTOCOLS_STR, PROTOCOLS) ||
|
||||
curl_easy_setopt(state->curl,
|
||||
CURLOPT_REDIR_PROTOCOLS_STR, PROTOCOLS)) {
|
||||
goto err;
|
||||
}
|
||||
#elif LIBCURL_VERSION_NUM >= 0x071304
|
||||
if (curl_easy_setopt(state->curl, CURLOPT_PROTOCOLS, PROTOCOLS) ||
|
||||
curl_easy_setopt(state->curl, CURLOPT_REDIR_PROTOCOLS, PROTOCOLS)) {
|
||||
goto err;
|
||||
@@ -669,7 +685,12 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
const char *file;
|
||||
const char *cookie;
|
||||
const char *cookie_secret;
|
||||
double d;
|
||||
/* CURL >= 7.55.0 uses curl_off_t for content length instead of a double */
|
||||
#if LIBCURL_VERSION_NUM >= 0x073700
|
||||
curl_off_t cl;
|
||||
#else
|
||||
double cl;
|
||||
#endif
|
||||
const char *secretid;
|
||||
const char *protocol_delimiter;
|
||||
int ret;
|
||||
@@ -796,27 +817,36 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
|
||||
}
|
||||
if (curl_easy_perform(state->curl))
|
||||
goto out;
|
||||
if (curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d)) {
|
||||
/* CURL 7.55.0 deprecates CURLINFO_CONTENT_LENGTH_DOWNLOAD in favour of
|
||||
* the *_T version which returns a more sensible type for content length.
|
||||
*/
|
||||
#if LIBCURL_VERSION_NUM >= 0x073700
|
||||
if (curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &cl)) {
|
||||
goto out;
|
||||
}
|
||||
#else
|
||||
if (curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &cl)) {
|
||||
goto out;
|
||||
}
|
||||
#endif
|
||||
/* Prior CURL 7.19.4 return value of 0 could mean that the file size is not
|
||||
* know or the size is zero. From 7.19.4 CURL returns -1 if size is not
|
||||
* known and zero if it is really zero-length file. */
|
||||
#if LIBCURL_VERSION_NUM >= 0x071304
|
||||
if (d < 0) {
|
||||
if (cl < 0) {
|
||||
pstrcpy(state->errmsg, CURL_ERROR_SIZE,
|
||||
"Server didn't report file size.");
|
||||
goto out;
|
||||
}
|
||||
#else
|
||||
if (d <= 0) {
|
||||
if (cl <= 0) {
|
||||
pstrcpy(state->errmsg, CURL_ERROR_SIZE,
|
||||
"Unknown file size or zero-length file.");
|
||||
goto out;
|
||||
}
|
||||
#endif
|
||||
|
||||
s->len = d;
|
||||
s->len = cl;
|
||||
|
||||
if ((!strncasecmp(s->url, "http://", strlen("http://"))
|
||||
|| !strncasecmp(s->url, "https://", strlen("https://")))
|
||||
|
@@ -21,6 +21,7 @@
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
#define HW_POISON_H /* avoid poison since we patch against rules it "enforces" */
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "qapi/error.h"
|
||||
|
@@ -22,6 +22,7 @@
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#define HW_POISON_H /* avoid poison since we patch against rules it "enforces" */
|
||||
#include "qemu/osdep.h"
|
||||
#include "qapi/error.h"
|
||||
#include "qemu/module.h"
|
||||
@@ -198,6 +199,17 @@ static void mux_chr_accept_input(Chardev *chr)
|
||||
be->chr_read(be->opaque,
|
||||
&d->buffer[m][d->cons[m]++ & MUX_BUFFER_MASK], 1);
|
||||
}
|
||||
|
||||
#if defined(TARGET_S390X)
|
||||
/*
|
||||
* We're still not able to sync producer and consumer, so let's wait a bit
|
||||
* and try again by then.
|
||||
*/
|
||||
if (d->prod[m] != d->cons[m]) {
|
||||
qemu_mod_timer(d->accept_timer, qemu_get_clock_ns(vm_clock)
|
||||
+ (int64_t)100000);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static int mux_chr_can_read(void *opaque)
|
||||
@@ -332,6 +344,10 @@ static void qemu_chr_open_mux(Chardev *chr,
|
||||
}
|
||||
|
||||
d->focus = -1;
|
||||
#if defined(TARGET_S390X)
|
||||
d->accept_timer = qemu_new_timer_ns(vm_clock,
|
||||
(QEMUTimerCB *)mux_chr_accept_input, chr);
|
||||
#endif
|
||||
/* only default to opened state if we've realized the initial
|
||||
* set of muxes
|
||||
*/
|
||||
|
@@ -22,6 +22,7 @@
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#define HW_POISON_H /* avoid poison since we patch against rules it "enforces" */
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/cutils.h"
|
||||
#include "monitor/monitor.h"
|
||||
|
@@ -37,6 +37,9 @@ struct MuxChardev {
|
||||
Chardev parent;
|
||||
CharBackend *backends[MAX_MUX];
|
||||
CharBackend chr;
|
||||
#if defined(TARGET_S390X)
|
||||
QEMUTimer *accept_timer;
|
||||
#endif
|
||||
int focus;
|
||||
int mux_cnt;
|
||||
int term_got_escape;
|
||||
|
@@ -18,6 +18,7 @@
|
||||
#CONFIG_QXL=n
|
||||
#CONFIG_SEV=n
|
||||
#CONFIG_SGA=n
|
||||
#CONFIG_TDX=n
|
||||
#CONFIG_TEST_DEVICES=n
|
||||
#CONFIG_TPM_CRB=n
|
||||
#CONFIG_TPM_TIS_ISA=n
|
||||
|
@@ -1,4 +1,4 @@
|
||||
executable('ivshmem-client', files('ivshmem-client.c', 'main.c'), genh,
|
||||
dependencies: glib,
|
||||
build_by_default: targetos == 'linux',
|
||||
install: false)
|
||||
install: true)
|
||||
|
@@ -1,4 +1,4 @@
|
||||
executable('ivshmem-server', files('ivshmem-server.c', 'main.c'), genh,
|
||||
dependencies: [qemuutil, rt],
|
||||
build_by_default: targetos == 'linux',
|
||||
install: false)
|
||||
install: true)
|
||||
|
5
disas.c
5
disas.c
@@ -347,8 +347,9 @@ physical_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length,
|
||||
CPUDebug *s = container_of(info, CPUDebug, info);
|
||||
MemTxResult res;
|
||||
|
||||
res = address_space_read(s->cpu->as, memaddr, MEMTXATTRS_UNSPECIFIED,
|
||||
myaddr, length);
|
||||
res = address_space_read_debug(s->cpu->as, memaddr,
|
||||
MEMTXATTRS_UNSPECIFIED_DEBUG,
|
||||
myaddr, length);
|
||||
return res == MEMTX_OK ? 0 : EIO;
|
||||
}
|
||||
|
||||
|
@@ -38,6 +38,7 @@ Supported mechanisms
|
||||
Currently supported confidential guest mechanisms are:
|
||||
|
||||
* AMD Secure Encrypted Virtualization (SEV) (see :doc:`i386/amd-memory-encryption`)
|
||||
* Intel Trust Domain Extension (TDX) (see :doc:`i386/tdx`)
|
||||
* POWER Protected Execution Facility (PEF) (see :ref:`power-papr-protected-execution-facility-pef`)
|
||||
* s390x Protected Virtualization (PV) (see :doc:`s390x/protvirt`)
|
||||
|
||||
|
112
docs/system/i386/tdx.rst
Normal file
112
docs/system/i386/tdx.rst
Normal file
@@ -0,0 +1,112 @@
|
||||
Intel Trusted Domain eXtension (TDX)
|
||||
====================================
|
||||
|
||||
Intel Trusted Domain eXtensions (TDX) refers to an Intel technology that extends
|
||||
Virtual Machine Extensions (VMX) and Multi-Key Total Memory Encryption (MKTME)
|
||||
with a new kind of virtual machine guest called a Trust Domain (TD). A TD runs
|
||||
in a CPU mode that is designed to protect the confidentiality of its memory
|
||||
contents and its CPU state from any other software, including the hosting
|
||||
Virtual Machine Monitor (VMM), unless explicitly shared by the TD itself.
|
||||
|
||||
Prerequisites
|
||||
-------------
|
||||
|
||||
To run TD, the physical machine needs to have TDX module loaded and initialized
|
||||
while KVM hypervisor has TDX support and has TDX enabled. If those requirements
|
||||
are met, the ``KVM_CAP_VM_TYPES`` will report the support of ``KVM_X86_TDX_VM``.
|
||||
|
||||
Trust Domain Virtual Firmware (TDVF)
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Trust Domain Virtual Firmware (TDVF) is required to provide TD services to boot
|
||||
TD Guest OS. TDVF needs to be copied to guest private memory and measured before
|
||||
a TD boots.
|
||||
|
||||
The VM scope ``MEMORY_ENCRYPT_OP`` ioctl provides command ``KVM_TDX_INIT_MEM_REGION``
|
||||
to copy the TDVF image to TD's private memory space.
|
||||
|
||||
Since TDX doesn't support readonly memslot, TDVF cannot be mapped as pflash
|
||||
device and it actually works as RAM. "-bios" option is chosen to load TDVF.
|
||||
|
||||
OVMF is the opensource firmware that implements the TDVF support. Thus the
|
||||
command line to specify and load TDVF is ``-bios OVMF.fd``
|
||||
|
||||
Restricted Memory memory-backend
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
TD's memory need to be back'ed by restricted memfd. Otherwise it cannot handle
|
||||
KVM_EXIT_MEMORY_FAULT exit from KVM.
|
||||
|
||||
Feature Control
|
||||
---------------
|
||||
|
||||
Unlike non-TDX VM, the CPU features (enumerated by CPU or MSR) of a TD is not
|
||||
under full control of VMM. VMM can only configure part of features of a TD on
|
||||
``KVM_TDX_INIT_VM`` command of VM scope ``MEMORY_ENCRYPT_OP`` ioctl.
|
||||
|
||||
The configurable features have three types:
|
||||
|
||||
- Attributes:
|
||||
- PKS (bit 30) controls whether Supervisor Protection Keys is exposed to TD,
|
||||
which determines related CPUID bit and CR4 bit;
|
||||
- PERFMON (bit 63) controls whether PMU is exposed to TD.
|
||||
|
||||
- XSAVE related features (XFAM):
|
||||
XFAM is a 64b mask, which has the same format as XCR0 or IA32_XSS MSR. It
|
||||
determines the set of extended features available for use by the guest TD.
|
||||
|
||||
- CPUID features:
|
||||
Only some bits of some CPUID leaves are directly configurable by VMM.
|
||||
|
||||
What features can be configured is reported via TDX capabilities.
|
||||
|
||||
TDX capabilities
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
The VM scope ``MEMORY_ENCRYPT_OP`` ioctl provides command ``KVM_TDX_CAPABILITIES``
|
||||
to get the TDX capabilities from KVM. It returns a data structure of
|
||||
``struct kvm_tdx_capabilites``, which tells the supported configuration of
|
||||
attributes, XFAM and CPUIDs.
|
||||
|
||||
Launching a TD (TDX VM)
|
||||
-----------------------
|
||||
|
||||
To launch a TDX guest:
|
||||
|
||||
.. parsed-literal::
|
||||
|
||||
|qemu_system_x86| \\
|
||||
-object memory-backend-memfd-private,id=ram1,size=${mem} \\
|
||||
-object tdx-guest,id=tdx0 \\
|
||||
-machine ...,kernel-irqchip=split,confidential-guest-support=tdx0,memory-backend=ram1 \\
|
||||
-bios OVMF.fd \\
|
||||
|
||||
Debugging
|
||||
---------
|
||||
|
||||
Bit 0 of TD attributes, is DEBUG bit, which decides if the TD runs in off-TD
|
||||
debug mode. When in off-TD debug mode, TD's VCPU state and private memory are
|
||||
accessible via given SEAMCALLs. This requires KVM to expose APIs to invoke those
|
||||
SEAMCALLs and resonponding QEMU change.
|
||||
|
||||
It's targeted as future work.
|
||||
|
||||
restrictions
|
||||
------------
|
||||
|
||||
- kernel-irqchip must be split;
|
||||
|
||||
- No readonly support for private memory;
|
||||
|
||||
- No SMM support: SMM support requires manipulating the guset register states
|
||||
which is not allowed;
|
||||
|
||||
Live Migration
|
||||
--------------
|
||||
|
||||
TODO
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
- `TDX Homepage <https://www.intel.com/content/www/us/en/developer/articles/technical/intel-trust-domain-extensions.html>`__
|
@@ -30,6 +30,7 @@ Architectural features
|
||||
i386/kvm-pv
|
||||
i386/sgx
|
||||
i386/amd-memory-encryption
|
||||
i386/tdx
|
||||
|
||||
.. _pcsys_005freq:
|
||||
|
||||
|
93
dump/dump.c
93
dump/dump.c
@@ -22,12 +22,14 @@
|
||||
#include "sysemu/runstate.h"
|
||||
#include "sysemu/cpus.h"
|
||||
#include "qapi/error.h"
|
||||
#include "sysemu/tdx.h"
|
||||
#include "qapi/qapi-commands-dump.h"
|
||||
#include "qapi/qapi-events-dump.h"
|
||||
#include "qapi/qmp/qerror.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "qemu/main-loop.h"
|
||||
#include "hw/misc/vmcoreinfo.h"
|
||||
#include "hw/boards.h"
|
||||
#include "migration/blocker.h"
|
||||
|
||||
#ifdef TARGET_X86_64
|
||||
@@ -550,6 +552,41 @@ static void write_memory(DumpState *s, GuestPhysBlock *block, ram_addr_t start,
|
||||
}
|
||||
}
|
||||
|
||||
static void write_memory_encrypted_guest(DumpState *s, GuestPhysBlock *block,
|
||||
ram_addr_t start, int64_t size,
|
||||
void *page_buf, uint64_t page_buf_size,
|
||||
Error **errp)
|
||||
{
|
||||
Error *local_err = NULL;
|
||||
hwaddr gpa = block->target_start + start;
|
||||
uint8_t *hva = block->host_addr + start;
|
||||
int64_t round_size;
|
||||
MemoryRegion *mr = block->mr;
|
||||
|
||||
if (!memory_region_ram_debug_ops_read_available(block->mr)) {
|
||||
memset(page_buf, 0, page_buf_size);
|
||||
return;
|
||||
}
|
||||
|
||||
while (size > 0) {
|
||||
round_size = size < page_buf_size ? size : page_buf_size;
|
||||
mr->ram_debug_ops->read(page_buf,
|
||||
hva, gpa,
|
||||
round_size,
|
||||
MEMTXATTRS_UNSPECIFIED_DEBUG);
|
||||
|
||||
write_data(s, page_buf, round_size, &local_err);
|
||||
if (local_err) {
|
||||
error_propagate(errp, local_err);
|
||||
return;
|
||||
}
|
||||
|
||||
size -= round_size;
|
||||
gpa += round_size;
|
||||
hva += round_size;
|
||||
}
|
||||
}
|
||||
|
||||
/* get the memory's offset and size in the vmcore */
|
||||
static void get_offset_range(hwaddr phys_addr,
|
||||
ram_addr_t mapping_length,
|
||||
@@ -753,6 +790,15 @@ static void dump_iterate(DumpState *s, Error **errp)
|
||||
ERRP_GUARD();
|
||||
GuestPhysBlock *block;
|
||||
int64_t memblock_size, memblock_start;
|
||||
void *page_buf = NULL;
|
||||
|
||||
if (s->encrypted_guest) {
|
||||
page_buf = g_malloc(s->dump_info.page_size);
|
||||
if (!page_buf) {
|
||||
error_setg(errp, "No enough memory.");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
|
||||
memblock_start = dump_filtered_memblock_start(block, s->filter_area_begin, s->filter_area_length);
|
||||
@@ -761,13 +807,21 @@ static void dump_iterate(DumpState *s, Error **errp)
|
||||
}
|
||||
|
||||
memblock_size = dump_filtered_memblock_size(block, s->filter_area_begin, s->filter_area_length);
|
||||
if (!s->encrypted_guest) {
|
||||
write_memory(s, block, memblock_start, memblock_size, errp);
|
||||
} else {
|
||||
write_memory_encrypted_guest(s, block, memblock_start, memblock_size,
|
||||
page_buf, s->dump_info.page_size,
|
||||
errp);
|
||||
}
|
||||
|
||||
/* Write the memory to file */
|
||||
write_memory(s, block, memblock_start, memblock_size, errp);
|
||||
if (*errp) {
|
||||
return;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (page_buf)
|
||||
g_free(page_buf);
|
||||
}
|
||||
|
||||
static void dump_end(DumpState *s, Error **errp)
|
||||
@@ -1252,6 +1306,17 @@ static uint64_t dump_pfn_to_paddr(DumpState *s, uint64_t pfn)
|
||||
return (pfn + ARCH_PFN_OFFSET) << target_page_shift;
|
||||
}
|
||||
|
||||
static void dump_encrypted_guest_memory(MemoryRegion *mr, uint8_t *buf,
|
||||
const uint8_t* hva, hwaddr gpa,
|
||||
size_t size)
|
||||
{
|
||||
if (memory_region_ram_debug_ops_read_available(mr)) {
|
||||
mr->ram_debug_ops->read(buf, hva, gpa, size,
|
||||
MEMTXATTRS_UNSPECIFIED_DEBUG);
|
||||
} else {
|
||||
memset(buf, 0, size);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Return the page frame number and the page content in *bufptr. bufptr can be
|
||||
* NULL. If not NULL, *bufptr must contains a target page size of pre-allocated
|
||||
@@ -1280,12 +1345,18 @@ static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr,
|
||||
while (1) {
|
||||
if (addr >= block->target_start && addr < block->target_end) {
|
||||
size_t n = MIN(block->target_end - addr, page_size - addr % page_size);
|
||||
MemoryRegion *mr = block->mr;
|
||||
|
||||
hbuf = block->host_addr + (addr - block->target_start);
|
||||
if (!buf) {
|
||||
if (n == page_size) {
|
||||
/* this is a whole target page, go for it */
|
||||
assert(addr % page_size == 0);
|
||||
|
||||
buf = hbuf;
|
||||
if (s->encrypted_guest && bufptr && *bufptr) {
|
||||
dump_encrypted_guest_memory(mr, *bufptr, hbuf, addr, page_size);
|
||||
}
|
||||
break;
|
||||
} else if (bufptr) {
|
||||
assert(*bufptr);
|
||||
@@ -1296,7 +1367,13 @@ static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr,
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(buf + addr % page_size, hbuf, n);
|
||||
if (s->encrypted_guest && bufptr && *bufptr) {
|
||||
dump_encrypted_guest_memory(mr, *bufptr + addr % page_size,
|
||||
hbuf, addr, n);
|
||||
} else {
|
||||
memcpy(buf + addr % page_size, hbuf, n);
|
||||
}
|
||||
|
||||
addr += n;
|
||||
if (addr % page_size == 0) {
|
||||
/* we filled up the page */
|
||||
@@ -1323,7 +1400,7 @@ static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr,
|
||||
}
|
||||
}
|
||||
|
||||
if (bufptr) {
|
||||
if (!s->encrypted_guest && bufptr) {
|
||||
*bufptr = buf;
|
||||
}
|
||||
|
||||
@@ -1711,6 +1788,10 @@ static void dump_state_prepare(DumpState *s)
|
||||
{
|
||||
/* zero the struct, setting status to active */
|
||||
*s = (DumpState) { .status = DUMP_STATUS_ACTIVE };
|
||||
|
||||
if (tdx_debug_enabled()) {
|
||||
s->encrypted_guest = true;
|
||||
}
|
||||
}
|
||||
|
||||
bool qemu_system_dump_in_progress(void)
|
||||
@@ -1895,7 +1976,7 @@ static void dump_init(DumpState *s, int fd, bool has_format,
|
||||
warn_report("guest note format is unsupported: %" PRIu16, format);
|
||||
} else {
|
||||
s->guest_note = g_malloc(size + 1); /* +1 for adding \0 */
|
||||
cpu_physical_memory_read(addr, s->guest_note, size);
|
||||
cpu_physical_memory_read_debug(addr, s->guest_note, size);
|
||||
|
||||
get_note_sizes(s, s->guest_note, NULL, &name_size, &desc_size);
|
||||
s->guest_note_size = ELF_NOTE_SIZE(note_head_size, name_size,
|
||||
|
@@ -26,6 +26,7 @@
|
||||
#include "qemu/xattr.h"
|
||||
#include "9p-iov-marshal.h"
|
||||
#include "hw/9pfs/9p-proxy.h"
|
||||
#include "hw/9pfs/9p-util.h"
|
||||
#include "fsdev/9p-iov-marshal.h"
|
||||
|
||||
#define PROGNAME "virtfs-proxy-helper"
|
||||
@@ -338,6 +339,28 @@ static void resetugid(int suid, int sgid)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Open regular file or directory. Attempts to open any special file are
|
||||
* rejected.
|
||||
*
|
||||
* returns file descriptor or -1 on error
|
||||
*/
|
||||
static int open_regular(const char *pathname, int flags, mode_t mode)
|
||||
{
|
||||
int fd;
|
||||
|
||||
fd = open(pathname, flags, mode);
|
||||
if (fd < 0) {
|
||||
return fd;
|
||||
}
|
||||
|
||||
if (close_if_special_file(fd) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
/*
|
||||
* send response in two parts
|
||||
* 1) ProxyHeader
|
||||
@@ -682,7 +705,7 @@ static int do_create(struct iovec *iovec)
|
||||
if (ret < 0) {
|
||||
goto unmarshal_err_out;
|
||||
}
|
||||
ret = open(path.data, flags, mode);
|
||||
ret = open_regular(path.data, flags, mode);
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
}
|
||||
@@ -707,7 +730,7 @@ static int do_open(struct iovec *iovec)
|
||||
if (ret < 0) {
|
||||
goto err_out;
|
||||
}
|
||||
ret = open(path.data, flags);
|
||||
ret = open_regular(path.data, flags, 0);
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
}
|
||||
|
@@ -71,9 +71,9 @@ static inline int target_memory_rw_debug(CPUState *cpu, target_ulong addr,
|
||||
#ifndef CONFIG_USER_ONLY
|
||||
if (phy_memory_mode) {
|
||||
if (is_write) {
|
||||
cpu_physical_memory_write(addr, buf, len);
|
||||
cpu_physical_memory_write_debug(addr, buf, len);
|
||||
} else {
|
||||
cpu_physical_memory_read(addr, buf, len);
|
||||
cpu_physical_memory_read_debug(addr, buf, len);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@@ -13,6 +13,8 @@
|
||||
#ifndef QEMU_9P_UTIL_H
|
||||
#define QEMU_9P_UTIL_H
|
||||
|
||||
#include "qemu/error-report.h"
|
||||
|
||||
#ifdef O_PATH
|
||||
#define O_PATH_9P_UTIL O_PATH
|
||||
#else
|
||||
@@ -112,6 +114,38 @@ static inline void close_preserve_errno(int fd)
|
||||
errno = serrno;
|
||||
}
|
||||
|
||||
/**
|
||||
* close_if_special_file() - Close @fd if neither regular file nor directory.
|
||||
*
|
||||
* @fd: file descriptor of open file
|
||||
* Return: 0 on regular file or directory, -1 otherwise
|
||||
*
|
||||
* CVE-2023-2861: Prohibit opening any special file directly on host
|
||||
* (especially device files), as a compromised client could potentially gain
|
||||
* access outside exported tree under certain, unsafe setups. We expect
|
||||
* client to handle I/O on special files exclusively on guest side.
|
||||
*/
|
||||
static inline int close_if_special_file(int fd)
|
||||
{
|
||||
struct stat stbuf;
|
||||
|
||||
if (fstat(fd, &stbuf) < 0) {
|
||||
close_preserve_errno(fd);
|
||||
return -1;
|
||||
}
|
||||
if (!S_ISREG(stbuf.st_mode) && !S_ISDIR(stbuf.st_mode)) {
|
||||
error_report_once(
|
||||
"9p: broken or compromised client detected; attempt to open "
|
||||
"special file (i.e. neither regular file, nor directory)"
|
||||
);
|
||||
close(fd);
|
||||
errno = ENXIO;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int openat_dir(int dirfd, const char *name)
|
||||
{
|
||||
return openat(dirfd, name,
|
||||
@@ -146,6 +180,10 @@ again:
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (close_if_special_file(fd) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
serrno = errno;
|
||||
/* O_NONBLOCK was only needed to open the file. Let's drop it. We don't
|
||||
* do that with O_PATH since fcntl(F_SETFL) isn't supported, and openat()
|
||||
|
@@ -722,6 +722,8 @@ static XenBlockDrive *xen_block_drive_create(const char *id,
|
||||
const char *mode = qdict_get_try_str(opts, "mode");
|
||||
const char *direct_io_safe = qdict_get_try_str(opts, "direct-io-safe");
|
||||
const char *discard_enable = qdict_get_try_str(opts, "discard-enable");
|
||||
const char *suse_diskcache_disable_flush = qdict_get_try_str(opts,
|
||||
"suse-diskcache-disable-flush");
|
||||
char *driver = NULL;
|
||||
char *filename = NULL;
|
||||
XenBlockDrive *drive = NULL;
|
||||
@@ -802,6 +804,16 @@ static XenBlockDrive *xen_block_drive_create(const char *id,
|
||||
}
|
||||
}
|
||||
|
||||
if (suse_diskcache_disable_flush) {
|
||||
unsigned long value;
|
||||
if (!qemu_strtoul(suse_diskcache_disable_flush, NULL, 2, &value) && !!value) {
|
||||
QDict *cache_qdict = qdict_new();
|
||||
|
||||
qdict_put_bool(cache_qdict, "no-flush", true);
|
||||
qdict_put_obj(file_layer, "cache", QOBJECT(cache_qdict));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* It is necessary to turn file locking off as an emulated device
|
||||
* may have already opened the same image file.
|
||||
|
@@ -97,7 +97,7 @@ static int find_memory_backend_type(Object *obj, void *opaque)
|
||||
|
||||
if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
|
||||
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
|
||||
RAMBlock *rb = backend->mr.ram_block;
|
||||
RAMBlock *rb = backend->mr->ram_block;
|
||||
|
||||
if (rb && rb->fd > 0) {
|
||||
ret = fcntl(rb->fd, F_GET_SEALS);
|
||||
|
@@ -10,6 +10,11 @@ config SGX
|
||||
bool
|
||||
depends on KVM
|
||||
|
||||
config TDX
|
||||
bool
|
||||
select X86_FW_OVMF
|
||||
depends on KVM
|
||||
|
||||
config PC
|
||||
bool
|
||||
imply APPLESMC
|
||||
@@ -26,6 +31,7 @@ config PC
|
||||
imply QXL
|
||||
imply SEV
|
||||
imply SGX
|
||||
imply TDX
|
||||
imply SGA
|
||||
imply TEST_DEVICES
|
||||
imply TPM_CRB
|
||||
|
@@ -871,7 +871,8 @@ static void build_dbg_aml(Aml *table)
|
||||
aml_append(table, scope);
|
||||
}
|
||||
|
||||
static Aml *build_link_dev(const char *name, uint8_t uid, Aml *reg)
|
||||
static Aml *build_link_dev(const char *name, uint8_t uid, Aml *reg,
|
||||
bool level_trigger_unsupported)
|
||||
{
|
||||
Aml *dev;
|
||||
Aml *crs;
|
||||
@@ -883,7 +884,10 @@ static Aml *build_link_dev(const char *name, uint8_t uid, Aml *reg)
|
||||
aml_append(dev, aml_name_decl("_UID", aml_int(uid)));
|
||||
|
||||
crs = aml_resource_template();
|
||||
aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL, AML_ACTIVE_HIGH,
|
||||
aml_append(crs, aml_interrupt(AML_CONSUMER,
|
||||
level_trigger_unsupported ?
|
||||
AML_EDGE : AML_LEVEL,
|
||||
AML_ACTIVE_HIGH,
|
||||
AML_SHARED, irqs, ARRAY_SIZE(irqs)));
|
||||
aml_append(dev, aml_name_decl("_PRS", crs));
|
||||
|
||||
@@ -907,7 +911,8 @@ static Aml *build_link_dev(const char *name, uint8_t uid, Aml *reg)
|
||||
return dev;
|
||||
}
|
||||
|
||||
static Aml *build_gsi_link_dev(const char *name, uint8_t uid, uint8_t gsi)
|
||||
static Aml *build_gsi_link_dev(const char *name, uint8_t uid,
|
||||
uint8_t gsi, bool level_trigger_unsupported)
|
||||
{
|
||||
Aml *dev;
|
||||
Aml *crs;
|
||||
@@ -920,7 +925,10 @@ static Aml *build_gsi_link_dev(const char *name, uint8_t uid, uint8_t gsi)
|
||||
|
||||
crs = aml_resource_template();
|
||||
irqs = gsi;
|
||||
aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL, AML_ACTIVE_HIGH,
|
||||
aml_append(crs, aml_interrupt(AML_CONSUMER,
|
||||
level_trigger_unsupported ?
|
||||
AML_EDGE : AML_LEVEL,
|
||||
AML_ACTIVE_HIGH,
|
||||
AML_SHARED, &irqs, 1));
|
||||
aml_append(dev, aml_name_decl("_PRS", crs));
|
||||
|
||||
@@ -939,7 +947,7 @@ static Aml *build_gsi_link_dev(const char *name, uint8_t uid, uint8_t gsi)
|
||||
}
|
||||
|
||||
/* _CRS method - get current settings */
|
||||
static Aml *build_iqcr_method(bool is_piix4)
|
||||
static Aml *build_iqcr_method(bool is_piix4, bool level_trigger_unsupported)
|
||||
{
|
||||
Aml *if_ctx;
|
||||
uint32_t irqs;
|
||||
@@ -947,7 +955,9 @@ static Aml *build_iqcr_method(bool is_piix4)
|
||||
Aml *crs = aml_resource_template();
|
||||
|
||||
irqs = 0;
|
||||
aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL,
|
||||
aml_append(crs, aml_interrupt(AML_CONSUMER,
|
||||
level_trigger_unsupported ?
|
||||
AML_EDGE : AML_LEVEL,
|
||||
AML_ACTIVE_HIGH, AML_SHARED, &irqs, 1));
|
||||
aml_append(method, aml_name_decl("PRR0", crs));
|
||||
|
||||
@@ -981,7 +991,7 @@ static Aml *build_irq_status_method(void)
|
||||
return method;
|
||||
}
|
||||
|
||||
static void build_piix4_pci0_int(Aml *table)
|
||||
static void build_piix4_pci0_int(Aml *table, bool level_trigger_unsupported)
|
||||
{
|
||||
Aml *dev;
|
||||
Aml *crs;
|
||||
@@ -994,12 +1004,16 @@ static void build_piix4_pci0_int(Aml *table)
|
||||
aml_append(sb_scope, pci0_scope);
|
||||
|
||||
aml_append(sb_scope, build_irq_status_method());
|
||||
aml_append(sb_scope, build_iqcr_method(true));
|
||||
aml_append(sb_scope, build_iqcr_method(true, level_trigger_unsupported));
|
||||
|
||||
aml_append(sb_scope, build_link_dev("LNKA", 0, aml_name("PRQ0")));
|
||||
aml_append(sb_scope, build_link_dev("LNKB", 1, aml_name("PRQ1")));
|
||||
aml_append(sb_scope, build_link_dev("LNKC", 2, aml_name("PRQ2")));
|
||||
aml_append(sb_scope, build_link_dev("LNKD", 3, aml_name("PRQ3")));
|
||||
aml_append(sb_scope, build_link_dev("LNKA", 0, aml_name("PRQ0"),
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_link_dev("LNKB", 1, aml_name("PRQ1"),
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_link_dev("LNKC", 2, aml_name("PRQ2"),
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_link_dev("LNKD", 3, aml_name("PRQ3"),
|
||||
level_trigger_unsupported));
|
||||
|
||||
dev = aml_device("LNKS");
|
||||
{
|
||||
@@ -1008,7 +1022,9 @@ static void build_piix4_pci0_int(Aml *table)
|
||||
|
||||
crs = aml_resource_template();
|
||||
irqs = 9;
|
||||
aml_append(crs, aml_interrupt(AML_CONSUMER, AML_LEVEL,
|
||||
aml_append(crs, aml_interrupt(AML_CONSUMER,
|
||||
level_trigger_unsupported ?
|
||||
AML_EDGE : AML_LEVEL,
|
||||
AML_ACTIVE_HIGH, AML_SHARED,
|
||||
&irqs, 1));
|
||||
aml_append(dev, aml_name_decl("_PRS", crs));
|
||||
@@ -1094,7 +1110,7 @@ static Aml *build_q35_routing_table(const char *str)
|
||||
return pkg;
|
||||
}
|
||||
|
||||
static void build_q35_pci0_int(Aml *table)
|
||||
static void build_q35_pci0_int(Aml *table, bool level_trigger_unsupported)
|
||||
{
|
||||
Aml *method;
|
||||
Aml *sb_scope = aml_scope("_SB");
|
||||
@@ -1133,25 +1149,41 @@ static void build_q35_pci0_int(Aml *table)
|
||||
aml_append(sb_scope, pci0_scope);
|
||||
|
||||
aml_append(sb_scope, build_irq_status_method());
|
||||
aml_append(sb_scope, build_iqcr_method(false));
|
||||
aml_append(sb_scope, build_iqcr_method(false, level_trigger_unsupported));
|
||||
|
||||
aml_append(sb_scope, build_link_dev("LNKA", 0, aml_name("PRQA")));
|
||||
aml_append(sb_scope, build_link_dev("LNKB", 1, aml_name("PRQB")));
|
||||
aml_append(sb_scope, build_link_dev("LNKC", 2, aml_name("PRQC")));
|
||||
aml_append(sb_scope, build_link_dev("LNKD", 3, aml_name("PRQD")));
|
||||
aml_append(sb_scope, build_link_dev("LNKE", 4, aml_name("PRQE")));
|
||||
aml_append(sb_scope, build_link_dev("LNKF", 5, aml_name("PRQF")));
|
||||
aml_append(sb_scope, build_link_dev("LNKG", 6, aml_name("PRQG")));
|
||||
aml_append(sb_scope, build_link_dev("LNKH", 7, aml_name("PRQH")));
|
||||
aml_append(sb_scope, build_link_dev("LNKA", 0, aml_name("PRQA"),
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_link_dev("LNKB", 1, aml_name("PRQB"),
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_link_dev("LNKC", 2, aml_name("PRQC"),
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_link_dev("LNKD", 3, aml_name("PRQD"),
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_link_dev("LNKE", 4, aml_name("PRQE"),
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_link_dev("LNKF", 5, aml_name("PRQF"),
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_link_dev("LNKG", 6, aml_name("PRQG"),
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_link_dev("LNKH", 7, aml_name("PRQH"),
|
||||
level_trigger_unsupported));
|
||||
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIA", 0x10, 0x10));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIB", 0x11, 0x11));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIC", 0x12, 0x12));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSID", 0x13, 0x13));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIE", 0x14, 0x14));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIF", 0x15, 0x15));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIG", 0x16, 0x16));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIH", 0x17, 0x17));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIA", 0x10, 0x10,
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIB", 0x11, 0x11,
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIC", 0x12, 0x12,
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSID", 0x13, 0x13,
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIE", 0x14, 0x14,
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIF", 0x15, 0x15,
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIG", 0x16, 0x16,
|
||||
level_trigger_unsupported));
|
||||
aml_append(sb_scope, build_gsi_link_dev("GSIH", 0x17, 0x17,
|
||||
level_trigger_unsupported));
|
||||
|
||||
aml_append(table, sb_scope);
|
||||
}
|
||||
@@ -1331,6 +1363,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
|
||||
PCMachineState *pcms = PC_MACHINE(machine);
|
||||
PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine);
|
||||
X86MachineState *x86ms = X86_MACHINE(machine);
|
||||
bool level_trigger_unsupported = x86ms->eoi_intercept_unsupported;
|
||||
AcpiMcfgInfo mcfg;
|
||||
bool mcfg_valid = !!acpi_get_mcfg(&mcfg);
|
||||
uint32_t nr_mem = machine->ram_slots;
|
||||
@@ -1363,7 +1396,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
|
||||
if (pm->pcihp_bridge_en || pm->pcihp_root_en) {
|
||||
build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base);
|
||||
}
|
||||
build_piix4_pci0_int(dsdt);
|
||||
build_piix4_pci0_int(dsdt, level_trigger_unsupported);
|
||||
} else if (q35) {
|
||||
sb_scope = aml_scope("_SB");
|
||||
dev = aml_device("PCI0");
|
||||
@@ -1407,7 +1440,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
|
||||
if (pm->pcihp_bridge_en) {
|
||||
build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base);
|
||||
}
|
||||
build_q35_pci0_int(dsdt);
|
||||
build_q35_pci0_int(dsdt, level_trigger_unsupported);
|
||||
}
|
||||
|
||||
if (misc->has_hpet) {
|
||||
|
@@ -105,6 +105,7 @@ void acpi_build_madt(GArray *table_data, BIOSLinker *linker,
|
||||
AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(adev);
|
||||
AcpiTable table = { .sig = "APIC", .rev = 1, .oem_id = oem_id,
|
||||
.oem_table_id = oem_table_id };
|
||||
bool level_trigger_unsupported = x86ms->eoi_intercept_unsupported;
|
||||
|
||||
acpi_table_begin(&table, table_data);
|
||||
/* Local APIC Address */
|
||||
@@ -124,18 +125,43 @@ void acpi_build_madt(GArray *table_data, BIOSLinker *linker,
|
||||
IO_APIC_SECONDARY_ADDRESS, IO_APIC_SECONDARY_IRQBASE);
|
||||
}
|
||||
|
||||
if (x86ms->apic_xrupt_override) {
|
||||
build_xrupt_override(table_data, 0, 2,
|
||||
0 /* Flags: Conforms to the specifications of the bus */);
|
||||
}
|
||||
|
||||
for (i = 1; i < 16; i++) {
|
||||
if (!(x86ms->pci_irq_mask & (1 << i))) {
|
||||
/* No need for a INT source override structure. */
|
||||
continue;
|
||||
if (level_trigger_unsupported) {
|
||||
/* Force edge trigger */
|
||||
if (x86ms->apic_xrupt_override) {
|
||||
build_xrupt_override(table_data, 0, 2,
|
||||
/* Flags: active high, edge triggered */
|
||||
1 | (1 << 2));
|
||||
}
|
||||
|
||||
for (i = x86ms->apic_xrupt_override ? 1 : 0; i < 16; i++) {
|
||||
build_xrupt_override(table_data, i, i,
|
||||
/* Flags: active high, edge triggered */
|
||||
1 | (1 << 2));
|
||||
}
|
||||
|
||||
if (x86ms->ioapic2) {
|
||||
for (i = 0; i < 16; i++) {
|
||||
build_xrupt_override(table_data, IO_APIC_SECONDARY_IRQBASE + i,
|
||||
IO_APIC_SECONDARY_IRQBASE + i,
|
||||
/* Flags: active high, edge triggered */
|
||||
1 | (1 << 2));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (x86ms->apic_xrupt_override) {
|
||||
build_xrupt_override(table_data, 0, 2,
|
||||
0 /* Flags: Conforms to the specifications of the bus */);
|
||||
}
|
||||
|
||||
for (i = 1; i < 16; i++) {
|
||||
if (!(x86ms->pci_irq_mask & (1 << i))) {
|
||||
/* No need for a INT source override structure. */
|
||||
continue;
|
||||
}
|
||||
build_xrupt_override(table_data, i, i,
|
||||
0xd /* Flags: Active high, Level Triggered */);
|
||||
|
||||
}
|
||||
build_xrupt_override(table_data, i, i,
|
||||
0xd /* Flags: Active high, Level Triggered */);
|
||||
}
|
||||
|
||||
if (x2apic_mode) {
|
||||
|
@@ -19,9 +19,11 @@
|
||||
#include "sysemu/kvm.h"
|
||||
#include "sysemu/runstate.h"
|
||||
#include "sysemu/hw_accel.h"
|
||||
#include "exec/confidential-guest-support.h"
|
||||
#include "kvm/kvm_i386.h"
|
||||
#include "migration/vmstate.h"
|
||||
#include "hw/sysbus.h"
|
||||
#include "hw/boards.h"
|
||||
#include "hw/kvm/clock.h"
|
||||
#include "hw/qdev-properties.h"
|
||||
#include "qapi/error.h"
|
||||
@@ -332,8 +334,15 @@ void kvmclock_create(bool create_always)
|
||||
{
|
||||
X86CPU *cpu = X86_CPU(first_cpu);
|
||||
|
||||
if (!kvm_enabled() || !kvm_has_adjust_clock())
|
||||
return;
|
||||
MachineState *ms = MACHINE(qdev_get_machine());
|
||||
ConfidentialGuestSupport *cgs = ms->cgs;
|
||||
|
||||
if (!kvm_enabled() || !kvm_has_adjust_clock() ||
|
||||
(cgs &&
|
||||
object_property_get_bool(OBJECT(cgs),
|
||||
CONFIDENTIAL_GUEST_SUPPORT_DISABLE_PV_CLOCK,
|
||||
NULL)))
|
||||
return;
|
||||
|
||||
if (create_always ||
|
||||
cpu->env.features[FEAT_KVM] & ((1ULL << KVM_FEATURE_CLOCKSOURCE) |
|
||||
|
@@ -28,6 +28,7 @@ i386_ss.add(when: 'CONFIG_PC', if_true: files(
|
||||
'port92.c'))
|
||||
i386_ss.add(when: 'CONFIG_X86_FW_OVMF', if_true: files('pc_sysfw_ovmf.c'),
|
||||
if_false: files('pc_sysfw_ovmf-stubs.c'))
|
||||
i386_ss.add(when: 'CONFIG_TDX', if_true: files('tdvf.c', 'tdvf-hob.c'))
|
||||
|
||||
subdir('kvm')
|
||||
subdir('xen')
|
||||
|
@@ -330,7 +330,7 @@ static void microvm_memory_init(MicrovmMachineState *mms)
|
||||
rom_set_fw(fw_cfg);
|
||||
|
||||
if (machine->kernel_filename != NULL) {
|
||||
x86_load_linux(x86ms, fw_cfg, 0, true, false);
|
||||
x86_load_linux(x86ms, fw_cfg, 0, true);
|
||||
}
|
||||
|
||||
if (mms->option_roms) {
|
||||
|
25
hw/i386/pc.c
25
hw/i386/pc.c
@@ -61,6 +61,7 @@
|
||||
#include "sysemu/reset.h"
|
||||
#include "sysemu/runstate.h"
|
||||
#include "kvm/kvm_i386.h"
|
||||
#include "kvm/tdx.h"
|
||||
#include "hw/xen/xen.h"
|
||||
#include "hw/xen/start_info.h"
|
||||
#include "ui/qemu-spice.h"
|
||||
@@ -799,7 +800,7 @@ void xen_load_linux(PCMachineState *pcms)
|
||||
rom_set_fw(fw_cfg);
|
||||
|
||||
x86_load_linux(x86ms, fw_cfg, pcmc->acpi_data_size,
|
||||
pcmc->pvh_enabled, pcmc->legacy_no_rng_seed);
|
||||
pcmc->pvh_enabled);
|
||||
for (i = 0; i < nb_option_roms; i++) {
|
||||
assert(!strcmp(option_rom[i].name, "linuxboot.bin") ||
|
||||
!strcmp(option_rom[i].name, "linuxboot_dma.bin") ||
|
||||
@@ -1085,16 +1086,18 @@ void pc_memory_init(PCMachineState *pcms,
|
||||
/* Initialize PC system firmware */
|
||||
pc_system_firmware_init(pcms, rom_memory);
|
||||
|
||||
option_rom_mr = g_malloc(sizeof(*option_rom_mr));
|
||||
memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
|
||||
&error_fatal);
|
||||
if (pcmc->pci_enabled) {
|
||||
memory_region_set_readonly(option_rom_mr, true);
|
||||
if (!is_tdx_vm()) {
|
||||
option_rom_mr = g_malloc(sizeof(*option_rom_mr));
|
||||
memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
|
||||
&error_fatal);
|
||||
if (pcmc->pci_enabled) {
|
||||
memory_region_set_readonly(option_rom_mr, true);
|
||||
}
|
||||
memory_region_add_subregion_overlap(rom_memory,
|
||||
PC_ROM_MIN_VGA,
|
||||
option_rom_mr,
|
||||
1);
|
||||
}
|
||||
memory_region_add_subregion_overlap(rom_memory,
|
||||
PC_ROM_MIN_VGA,
|
||||
option_rom_mr,
|
||||
1);
|
||||
|
||||
fw_cfg = fw_cfg_arch_create(machine,
|
||||
x86ms->boot_cpus, x86ms->apic_id_limit);
|
||||
@@ -1119,7 +1122,7 @@ void pc_memory_init(PCMachineState *pcms,
|
||||
|
||||
if (linux_boot) {
|
||||
x86_load_linux(x86ms, fw_cfg, pcmc->acpi_data_size,
|
||||
pcmc->pvh_enabled, pcmc->legacy_no_rng_seed);
|
||||
pcmc->pvh_enabled);
|
||||
}
|
||||
|
||||
for (i = 0; i < nb_option_roms; i++) {
|
||||
|
@@ -426,6 +426,7 @@ static void pc_i440fx_machine_options(MachineClass *m)
|
||||
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
|
||||
pcmc->default_nic_model = "e1000";
|
||||
pcmc->pci_root_uid = 0;
|
||||
pcmc->default_cpu_version = 1;
|
||||
|
||||
m->family = "pc_piix";
|
||||
m->desc = "Standard PC (i440FX + PIIX, 1996)";
|
||||
@@ -437,11 +438,9 @@ static void pc_i440fx_machine_options(MachineClass *m)
|
||||
|
||||
static void pc_i440fx_7_2_machine_options(MachineClass *m)
|
||||
{
|
||||
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
|
||||
pc_i440fx_machine_options(m);
|
||||
m->alias = "pc";
|
||||
m->is_default = true;
|
||||
pcmc->default_cpu_version = 1;
|
||||
}
|
||||
|
||||
DEFINE_I440FX_MACHINE(v7_2, "pc-i440fx-7.2", NULL,
|
||||
@@ -449,11 +448,9 @@ DEFINE_I440FX_MACHINE(v7_2, "pc-i440fx-7.2", NULL,
|
||||
|
||||
static void pc_i440fx_7_1_machine_options(MachineClass *m)
|
||||
{
|
||||
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
|
||||
pc_i440fx_7_2_machine_options(m);
|
||||
m->alias = NULL;
|
||||
m->is_default = false;
|
||||
pcmc->legacy_no_rng_seed = true;
|
||||
compat_props_add(m->compat_props, hw_compat_7_1, hw_compat_7_1_len);
|
||||
compat_props_add(m->compat_props, pc_compat_7_1, pc_compat_7_1_len);
|
||||
}
|
||||
@@ -465,8 +462,6 @@ static void pc_i440fx_7_0_machine_options(MachineClass *m)
|
||||
{
|
||||
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
|
||||
pc_i440fx_7_1_machine_options(m);
|
||||
m->alias = NULL;
|
||||
m->is_default = false;
|
||||
pcmc->enforce_amd_1tb_hole = false;
|
||||
compat_props_add(m->compat_props, hw_compat_7_0, hw_compat_7_0_len);
|
||||
compat_props_add(m->compat_props, pc_compat_7_0, pc_compat_7_0_len);
|
||||
@@ -478,8 +473,6 @@ DEFINE_I440FX_MACHINE(v7_0, "pc-i440fx-7.0", NULL,
|
||||
static void pc_i440fx_6_2_machine_options(MachineClass *m)
|
||||
{
|
||||
pc_i440fx_7_0_machine_options(m);
|
||||
m->alias = NULL;
|
||||
m->is_default = false;
|
||||
compat_props_add(m->compat_props, hw_compat_6_2, hw_compat_6_2_len);
|
||||
compat_props_add(m->compat_props, pc_compat_6_2, pc_compat_6_2_len);
|
||||
}
|
||||
@@ -490,8 +483,6 @@ DEFINE_I440FX_MACHINE(v6_2, "pc-i440fx-6.2", NULL,
|
||||
static void pc_i440fx_6_1_machine_options(MachineClass *m)
|
||||
{
|
||||
pc_i440fx_6_2_machine_options(m);
|
||||
m->alias = NULL;
|
||||
m->is_default = false;
|
||||
compat_props_add(m->compat_props, hw_compat_6_1, hw_compat_6_1_len);
|
||||
compat_props_add(m->compat_props, pc_compat_6_1, pc_compat_6_1_len);
|
||||
m->smp_props.prefer_sockets = true;
|
||||
@@ -503,8 +494,6 @@ DEFINE_I440FX_MACHINE(v6_1, "pc-i440fx-6.1", NULL,
|
||||
static void pc_i440fx_6_0_machine_options(MachineClass *m)
|
||||
{
|
||||
pc_i440fx_6_1_machine_options(m);
|
||||
m->alias = NULL;
|
||||
m->is_default = false;
|
||||
compat_props_add(m->compat_props, hw_compat_6_0, hw_compat_6_0_len);
|
||||
compat_props_add(m->compat_props, pc_compat_6_0, pc_compat_6_0_len);
|
||||
}
|
||||
@@ -515,8 +504,6 @@ DEFINE_I440FX_MACHINE(v6_0, "pc-i440fx-6.0", NULL,
|
||||
static void pc_i440fx_5_2_machine_options(MachineClass *m)
|
||||
{
|
||||
pc_i440fx_6_0_machine_options(m);
|
||||
m->alias = NULL;
|
||||
m->is_default = false;
|
||||
compat_props_add(m->compat_props, hw_compat_5_2, hw_compat_5_2_len);
|
||||
compat_props_add(m->compat_props, pc_compat_5_2, pc_compat_5_2_len);
|
||||
}
|
||||
@@ -529,8 +516,6 @@ static void pc_i440fx_5_1_machine_options(MachineClass *m)
|
||||
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
|
||||
|
||||
pc_i440fx_5_2_machine_options(m);
|
||||
m->alias = NULL;
|
||||
m->is_default = false;
|
||||
compat_props_add(m->compat_props, hw_compat_5_1, hw_compat_5_1_len);
|
||||
compat_props_add(m->compat_props, pc_compat_5_1, pc_compat_5_1_len);
|
||||
pcmc->kvmclock_create_always = false;
|
||||
@@ -543,8 +528,6 @@ DEFINE_I440FX_MACHINE(v5_1, "pc-i440fx-5.1", NULL,
|
||||
static void pc_i440fx_5_0_machine_options(MachineClass *m)
|
||||
{
|
||||
pc_i440fx_5_1_machine_options(m);
|
||||
m->alias = NULL;
|
||||
m->is_default = false;
|
||||
m->numa_mem_supported = true;
|
||||
compat_props_add(m->compat_props, hw_compat_5_0, hw_compat_5_0_len);
|
||||
compat_props_add(m->compat_props, pc_compat_5_0, pc_compat_5_0_len);
|
||||
@@ -557,8 +540,6 @@ DEFINE_I440FX_MACHINE(v5_0, "pc-i440fx-5.0", NULL,
|
||||
static void pc_i440fx_4_2_machine_options(MachineClass *m)
|
||||
{
|
||||
pc_i440fx_5_0_machine_options(m);
|
||||
m->alias = NULL;
|
||||
m->is_default = false;
|
||||
compat_props_add(m->compat_props, hw_compat_4_2, hw_compat_4_2_len);
|
||||
compat_props_add(m->compat_props, pc_compat_4_2, pc_compat_4_2_len);
|
||||
}
|
||||
@@ -569,8 +550,6 @@ DEFINE_I440FX_MACHINE(v4_2, "pc-i440fx-4.2", NULL,
|
||||
static void pc_i440fx_4_1_machine_options(MachineClass *m)
|
||||
{
|
||||
pc_i440fx_4_2_machine_options(m);
|
||||
m->alias = NULL;
|
||||
m->is_default = false;
|
||||
compat_props_add(m->compat_props, hw_compat_4_1, hw_compat_4_1_len);
|
||||
compat_props_add(m->compat_props, pc_compat_4_1, pc_compat_4_1_len);
|
||||
}
|
||||
@@ -582,8 +561,6 @@ static void pc_i440fx_4_0_machine_options(MachineClass *m)
|
||||
{
|
||||
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
|
||||
pc_i440fx_4_1_machine_options(m);
|
||||
m->alias = NULL;
|
||||
m->is_default = false;
|
||||
pcmc->default_cpu_version = CPU_VERSION_LEGACY;
|
||||
compat_props_add(m->compat_props, hw_compat_4_0, hw_compat_4_0_len);
|
||||
compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len);
|
||||
@@ -597,9 +574,7 @@ static void pc_i440fx_3_1_machine_options(MachineClass *m)
|
||||
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
|
||||
|
||||
pc_i440fx_4_0_machine_options(m);
|
||||
m->is_default = false;
|
||||
m->smbus_no_migration_support = true;
|
||||
m->alias = NULL;
|
||||
pcmc->pvh_enabled = false;
|
||||
compat_props_add(m->compat_props, hw_compat_3_1, hw_compat_3_1_len);
|
||||
compat_props_add(m->compat_props, pc_compat_3_1, pc_compat_3_1_len);
|
||||
|
@@ -355,6 +355,7 @@ static void pc_q35_machine_options(MachineClass *m)
|
||||
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
|
||||
pcmc->default_nic_model = "e1000e";
|
||||
pcmc->pci_root_uid = 0;
|
||||
pcmc->default_cpu_version = 1;
|
||||
|
||||
m->family = "pc_q35";
|
||||
m->desc = "Standard PC (Q35 + ICH9, 2009)";
|
||||
@@ -372,10 +373,8 @@ static void pc_q35_machine_options(MachineClass *m)
|
||||
|
||||
static void pc_q35_7_2_machine_options(MachineClass *m)
|
||||
{
|
||||
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
|
||||
pc_q35_machine_options(m);
|
||||
m->alias = "q35";
|
||||
pcmc->default_cpu_version = 1;
|
||||
}
|
||||
|
||||
DEFINE_Q35_MACHINE(v7_2, "pc-q35-7.2", NULL,
|
||||
@@ -383,10 +382,8 @@ DEFINE_Q35_MACHINE(v7_2, "pc-q35-7.2", NULL,
|
||||
|
||||
static void pc_q35_7_1_machine_options(MachineClass *m)
|
||||
{
|
||||
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
|
||||
pc_q35_7_2_machine_options(m);
|
||||
m->alias = NULL;
|
||||
pcmc->legacy_no_rng_seed = true;
|
||||
compat_props_add(m->compat_props, hw_compat_7_1, hw_compat_7_1_len);
|
||||
compat_props_add(m->compat_props, pc_compat_7_1, pc_compat_7_1_len);
|
||||
}
|
||||
@@ -398,7 +395,6 @@ static void pc_q35_7_0_machine_options(MachineClass *m)
|
||||
{
|
||||
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
|
||||
pc_q35_7_1_machine_options(m);
|
||||
m->alias = NULL;
|
||||
pcmc->enforce_amd_1tb_hole = false;
|
||||
compat_props_add(m->compat_props, hw_compat_7_0, hw_compat_7_0_len);
|
||||
compat_props_add(m->compat_props, pc_compat_7_0, pc_compat_7_0_len);
|
||||
@@ -410,7 +406,6 @@ DEFINE_Q35_MACHINE(v7_0, "pc-q35-7.0", NULL,
|
||||
static void pc_q35_6_2_machine_options(MachineClass *m)
|
||||
{
|
||||
pc_q35_7_0_machine_options(m);
|
||||
m->alias = NULL;
|
||||
compat_props_add(m->compat_props, hw_compat_6_2, hw_compat_6_2_len);
|
||||
compat_props_add(m->compat_props, pc_compat_6_2, pc_compat_6_2_len);
|
||||
}
|
||||
@@ -421,7 +416,6 @@ DEFINE_Q35_MACHINE(v6_2, "pc-q35-6.2", NULL,
|
||||
static void pc_q35_6_1_machine_options(MachineClass *m)
|
||||
{
|
||||
pc_q35_6_2_machine_options(m);
|
||||
m->alias = NULL;
|
||||
compat_props_add(m->compat_props, hw_compat_6_1, hw_compat_6_1_len);
|
||||
compat_props_add(m->compat_props, pc_compat_6_1, pc_compat_6_1_len);
|
||||
m->smp_props.prefer_sockets = true;
|
||||
@@ -433,7 +427,6 @@ DEFINE_Q35_MACHINE(v6_1, "pc-q35-6.1", NULL,
|
||||
static void pc_q35_6_0_machine_options(MachineClass *m)
|
||||
{
|
||||
pc_q35_6_1_machine_options(m);
|
||||
m->alias = NULL;
|
||||
compat_props_add(m->compat_props, hw_compat_6_0, hw_compat_6_0_len);
|
||||
compat_props_add(m->compat_props, pc_compat_6_0, pc_compat_6_0_len);
|
||||
}
|
||||
@@ -444,7 +437,6 @@ DEFINE_Q35_MACHINE(v6_0, "pc-q35-6.0", NULL,
|
||||
static void pc_q35_5_2_machine_options(MachineClass *m)
|
||||
{
|
||||
pc_q35_6_0_machine_options(m);
|
||||
m->alias = NULL;
|
||||
compat_props_add(m->compat_props, hw_compat_5_2, hw_compat_5_2_len);
|
||||
compat_props_add(m->compat_props, pc_compat_5_2, pc_compat_5_2_len);
|
||||
}
|
||||
@@ -457,7 +449,6 @@ static void pc_q35_5_1_machine_options(MachineClass *m)
|
||||
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
|
||||
|
||||
pc_q35_5_2_machine_options(m);
|
||||
m->alias = NULL;
|
||||
compat_props_add(m->compat_props, hw_compat_5_1, hw_compat_5_1_len);
|
||||
compat_props_add(m->compat_props, pc_compat_5_1, pc_compat_5_1_len);
|
||||
pcmc->kvmclock_create_always = false;
|
||||
@@ -470,7 +461,6 @@ DEFINE_Q35_MACHINE(v5_1, "pc-q35-5.1", NULL,
|
||||
static void pc_q35_5_0_machine_options(MachineClass *m)
|
||||
{
|
||||
pc_q35_5_1_machine_options(m);
|
||||
m->alias = NULL;
|
||||
m->numa_mem_supported = true;
|
||||
compat_props_add(m->compat_props, hw_compat_5_0, hw_compat_5_0_len);
|
||||
compat_props_add(m->compat_props, pc_compat_5_0, pc_compat_5_0_len);
|
||||
@@ -483,7 +473,6 @@ DEFINE_Q35_MACHINE(v5_0, "pc-q35-5.0", NULL,
|
||||
static void pc_q35_4_2_machine_options(MachineClass *m)
|
||||
{
|
||||
pc_q35_5_0_machine_options(m);
|
||||
m->alias = NULL;
|
||||
compat_props_add(m->compat_props, hw_compat_4_2, hw_compat_4_2_len);
|
||||
compat_props_add(m->compat_props, pc_compat_4_2, pc_compat_4_2_len);
|
||||
}
|
||||
@@ -494,7 +483,6 @@ DEFINE_Q35_MACHINE(v4_2, "pc-q35-4.2", NULL,
|
||||
static void pc_q35_4_1_machine_options(MachineClass *m)
|
||||
{
|
||||
pc_q35_4_2_machine_options(m);
|
||||
m->alias = NULL;
|
||||
compat_props_add(m->compat_props, hw_compat_4_1, hw_compat_4_1_len);
|
||||
compat_props_add(m->compat_props, pc_compat_4_1, pc_compat_4_1_len);
|
||||
}
|
||||
@@ -506,7 +494,6 @@ static void pc_q35_4_0_1_machine_options(MachineClass *m)
|
||||
{
|
||||
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
|
||||
pc_q35_4_1_machine_options(m);
|
||||
m->alias = NULL;
|
||||
pcmc->default_cpu_version = CPU_VERSION_LEGACY;
|
||||
/*
|
||||
* This is the default machine for the 4.0-stable branch. It is basically
|
||||
@@ -524,7 +511,6 @@ static void pc_q35_4_0_machine_options(MachineClass *m)
|
||||
{
|
||||
pc_q35_4_0_1_machine_options(m);
|
||||
m->default_kernel_irqchip_split = true;
|
||||
m->alias = NULL;
|
||||
/* Compat props are applied by the 4.0.1 machine */
|
||||
}
|
||||
|
||||
@@ -538,7 +524,6 @@ static void pc_q35_3_1_machine_options(MachineClass *m)
|
||||
pc_q35_4_0_machine_options(m);
|
||||
m->default_kernel_irqchip_split = false;
|
||||
m->smbus_no_migration_support = true;
|
||||
m->alias = NULL;
|
||||
pcmc->pvh_enabled = false;
|
||||
compat_props_add(m->compat_props, hw_compat_3_1, hw_compat_3_1_len);
|
||||
compat_props_add(m->compat_props, pc_compat_3_1, pc_compat_3_1_len);
|
||||
|
@@ -37,6 +37,7 @@
|
||||
#include "hw/block/flash.h"
|
||||
#include "sysemu/kvm.h"
|
||||
#include "sev.h"
|
||||
#include "kvm/tdx.h"
|
||||
|
||||
#define FLASH_SECTOR_SIZE 4096
|
||||
|
||||
@@ -265,5 +266,11 @@ void x86_firmware_configure(void *ptr, int size)
|
||||
}
|
||||
|
||||
sev_encrypt_flash(ptr, size, &error_fatal);
|
||||
} else if (is_tdx_vm()) {
|
||||
ret = tdx_parse_tdvf(ptr, size);
|
||||
if (ret) {
|
||||
error_report("failed to parse TDVF for TDX VM");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
162
hw/i386/tdvf-hob.c
Normal file
162
hw/i386/tdvf-hob.c
Normal file
@@ -0,0 +1,162 @@
|
||||
/*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
* Copyright (c) 2020 Intel Corporation
|
||||
* Author: Isaku Yamahata <isaku.yamahata at gmail.com>
|
||||
* <isaku.yamahata at intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/log.h"
|
||||
#include "e820_memory_layout.h"
|
||||
#include "hw/i386/pc.h"
|
||||
#include "hw/i386/x86.h"
|
||||
#include "hw/pci/pcie_host.h"
|
||||
#include "sysemu/kvm.h"
|
||||
#include "standard-headers/uefi/uefi.h"
|
||||
#include "tdvf-hob.h"
|
||||
|
||||
typedef struct TdvfHob {
|
||||
hwaddr hob_addr;
|
||||
void *ptr;
|
||||
int size;
|
||||
|
||||
/* working area */
|
||||
void *current;
|
||||
void *end;
|
||||
} TdvfHob;
|
||||
|
||||
static uint64_t tdvf_current_guest_addr(const TdvfHob *hob)
|
||||
{
|
||||
return hob->hob_addr + (hob->current - hob->ptr);
|
||||
}
|
||||
|
||||
static void tdvf_align(TdvfHob *hob, size_t align)
|
||||
{
|
||||
hob->current = QEMU_ALIGN_PTR_UP(hob->current, align);
|
||||
}
|
||||
|
||||
static void *tdvf_get_area(TdvfHob *hob, uint64_t size)
|
||||
{
|
||||
void *ret;
|
||||
|
||||
if (hob->current + size > hob->end) {
|
||||
error_report("TD_HOB overrun, size = 0x%" PRIx64, size);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
ret = hob->current;
|
||||
hob->current += size;
|
||||
tdvf_align(hob, 8);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void tdvf_hob_add_memory_resources(TdxGuest *tdx, TdvfHob *hob)
|
||||
{
|
||||
EFI_HOB_RESOURCE_DESCRIPTOR *region;
|
||||
EFI_RESOURCE_ATTRIBUTE_TYPE attr;
|
||||
EFI_RESOURCE_TYPE resource_type;
|
||||
|
||||
TdxRamEntry *e;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < tdx->nr_ram_entries; i++) {
|
||||
e = &tdx->ram_entries[i];
|
||||
|
||||
if (e->type == TDX_RAM_UNACCEPTED) {
|
||||
resource_type = EFI_RESOURCE_MEMORY_UNACCEPTED;
|
||||
attr = EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED;
|
||||
} else if (e->type == TDX_RAM_ADDED){
|
||||
resource_type = EFI_RESOURCE_SYSTEM_MEMORY;
|
||||
attr = EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE;
|
||||
} else {
|
||||
error_report("unknown TDX_RAM_ENTRY type %d", e->type);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* REVERTME: workaround for the old version of TDVF expectations. */
|
||||
if (!tdx->tdvf.guid_found) {
|
||||
switch (e->type) {
|
||||
case TDX_RAM_UNACCEPTED:
|
||||
resource_type = EFI_RESOURCE_SYSTEM_MEMORY;
|
||||
break;
|
||||
case TDX_RAM_ADDED:
|
||||
resource_type = EFI_RESOURCE_MEMORY_RESERVED;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
region = tdvf_get_area(hob, sizeof(*region));
|
||||
*region = (EFI_HOB_RESOURCE_DESCRIPTOR) {
|
||||
.Header = {
|
||||
.HobType = EFI_HOB_TYPE_RESOURCE_DESCRIPTOR,
|
||||
.HobLength = cpu_to_le16(sizeof(*region)),
|
||||
.Reserved = cpu_to_le32(0),
|
||||
},
|
||||
.Owner = EFI_HOB_OWNER_ZERO,
|
||||
.ResourceType = cpu_to_le32(resource_type),
|
||||
.ResourceAttribute = cpu_to_le32(attr),
|
||||
.PhysicalStart = cpu_to_le64(e->address),
|
||||
.ResourceLength = cpu_to_le64(e->length),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob)
|
||||
{
|
||||
TdvfHob hob;
|
||||
EFI_HOB_GENERIC_HEADER *last_hob;
|
||||
EFI_HOB_HANDOFF_INFO_TABLE *hit;
|
||||
|
||||
if (!td_hob) {
|
||||
return;
|
||||
}
|
||||
|
||||
hob.hob_addr = td_hob->address,
|
||||
hob.size = td_hob->size,
|
||||
hob.ptr = td_hob->mem_ptr,
|
||||
hob.current = td_hob->mem_ptr,
|
||||
hob.end = td_hob->mem_ptr + td_hob->size,
|
||||
|
||||
/* Note, Efi{Free}Memory{Bottom,Top} are ignored, leave 'em zeroed. */
|
||||
hit = tdvf_get_area(&hob, sizeof(*hit));
|
||||
*hit = (EFI_HOB_HANDOFF_INFO_TABLE) {
|
||||
.Header = {
|
||||
.HobType = EFI_HOB_TYPE_HANDOFF,
|
||||
.HobLength = cpu_to_le16(sizeof(*hit)),
|
||||
.Reserved = cpu_to_le32(0),
|
||||
},
|
||||
.Version = cpu_to_le32(EFI_HOB_HANDOFF_TABLE_VERSION),
|
||||
.BootMode = cpu_to_le32(0),
|
||||
.EfiMemoryTop = cpu_to_le64(0),
|
||||
.EfiMemoryBottom = cpu_to_le64(0),
|
||||
.EfiFreeMemoryTop = cpu_to_le64(0),
|
||||
.EfiFreeMemoryBottom = cpu_to_le64(0),
|
||||
.EfiEndOfHobList = cpu_to_le64(0), /* initialized later */
|
||||
};
|
||||
|
||||
tdvf_hob_add_memory_resources(tdx, &hob);
|
||||
|
||||
last_hob = tdvf_get_area(&hob, sizeof(*last_hob));
|
||||
*last_hob = (EFI_HOB_GENERIC_HEADER) {
|
||||
.HobType = EFI_HOB_TYPE_END_OF_HOB_LIST,
|
||||
.HobLength = cpu_to_le16(sizeof(*last_hob)),
|
||||
.Reserved = cpu_to_le32(0),
|
||||
};
|
||||
hit->EfiEndOfHobList = tdvf_current_guest_addr(&hob);
|
||||
}
|
24
hw/i386/tdvf-hob.h
Normal file
24
hw/i386/tdvf-hob.h
Normal file
@@ -0,0 +1,24 @@
|
||||
#ifndef HW_I386_TD_HOB_H
|
||||
#define HW_I386_TD_HOB_H
|
||||
|
||||
#include "hw/i386/tdvf.h"
|
||||
#include "target/i386/kvm/tdx.h"
|
||||
|
||||
void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob);
|
||||
|
||||
#define EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE \
|
||||
(EFI_RESOURCE_ATTRIBUTE_PRESENT | \
|
||||
EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \
|
||||
EFI_RESOURCE_ATTRIBUTE_TESTED)
|
||||
|
||||
#define EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED \
|
||||
(EFI_RESOURCE_ATTRIBUTE_PRESENT | \
|
||||
EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \
|
||||
EFI_RESOURCE_ATTRIBUTE_TESTED)
|
||||
|
||||
#define EFI_RESOURCE_ATTRIBUTE_TDVF_MMIO \
|
||||
(EFI_RESOURCE_ATTRIBUTE_PRESENT | \
|
||||
EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \
|
||||
EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE)
|
||||
|
||||
#endif
|
214
hw/i386/tdvf.c
Normal file
214
hw/i386/tdvf.c
Normal file
@@ -0,0 +1,214 @@
|
||||
/*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
* Copyright (c) 2020 Intel Corporation
|
||||
* Author: Isaku Yamahata <isaku.yamahata at gmail.com>
|
||||
* <isaku.yamahata at intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "hw/i386/pc.h"
|
||||
#include "hw/i386/tdvf.h"
|
||||
#include "sysemu/kvm.h"
|
||||
|
||||
#define TDX_METADATA_OFFSET_GUID "e47a6535-984a-4798-865e-4685a7bf8ec2"
|
||||
#define TDX_METADATA_VERSION 1
|
||||
#define TDVF_SIGNATURE 0x46564454 /* TDVF as little endian */
|
||||
|
||||
typedef struct {
|
||||
uint32_t DataOffset;
|
||||
uint32_t RawDataSize;
|
||||
uint64_t MemoryAddress;
|
||||
uint64_t MemoryDataSize;
|
||||
uint32_t Type;
|
||||
uint32_t Attributes;
|
||||
} TdvfSectionEntry;
|
||||
|
||||
typedef struct {
|
||||
uint32_t Signature;
|
||||
uint32_t Length;
|
||||
uint32_t Version;
|
||||
uint32_t NumberOfSectionEntries;
|
||||
TdvfSectionEntry SectionEntries[];
|
||||
} TdvfMetadata;
|
||||
|
||||
struct tdx_metadata_offset {
|
||||
uint32_t offset;
|
||||
};
|
||||
|
||||
static TdvfMetadata *tdvf_get_metadata(TdxFirmware *fw, void *flash_ptr, int size)
|
||||
{
|
||||
TdvfMetadata *metadata;
|
||||
uint32_t offset = 0;
|
||||
uint8_t *data;
|
||||
|
||||
if ((uint32_t) size != size) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (pc_system_ovmf_table_find(TDX_METADATA_OFFSET_GUID, &data, NULL)) {
|
||||
fw->guid_found = true;
|
||||
|
||||
offset = size - le32_to_cpu(((struct tdx_metadata_offset *)data)->offset);
|
||||
if (offset + sizeof(*metadata) > size) {
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
error_report("Cannot find TDX_METADATA_OFFSET_GUID");
|
||||
warn_report("==============================================================");
|
||||
warn_report("!!! Warning: Please upgrade to upstream version TDVF !!!");
|
||||
warn_report("!!! Old version will be deprecated soon !!!");
|
||||
warn_report("==============================================================");
|
||||
fw->guid_found = false;
|
||||
|
||||
#define TDVF_METDATA_OFFSET_FROM_END 0x20
|
||||
offset = size - TDVF_METDATA_OFFSET_FROM_END;
|
||||
uint32_t *metadata_offset = (uint32_t *)(flash_ptr + offset);
|
||||
offset = le32_to_cpu(*metadata_offset);
|
||||
|
||||
if (offset + sizeof(*metadata) > size) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
metadata = flash_ptr + offset;
|
||||
|
||||
/* Finally, verify the signature to determine if this is a TDVF image. */
|
||||
metadata->Signature = le32_to_cpu(metadata->Signature);
|
||||
if (metadata->Signature != TDVF_SIGNATURE) {
|
||||
error_report("Invalid TDVF signature in metadata!");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Sanity check that the TDVF doesn't overlap its own metadata. */
|
||||
metadata->Length = le32_to_cpu(metadata->Length);
|
||||
if (offset + metadata->Length > size) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Only version 1 is supported/defined. */
|
||||
metadata->Version = le32_to_cpu(metadata->Version);
|
||||
if (metadata->Version != TDX_METADATA_VERSION) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return metadata;
|
||||
}
|
||||
|
||||
static int tdvf_parse_and_check_section_entry(const TdvfSectionEntry *src,
|
||||
TdxFirmwareEntry *entry)
|
||||
{
|
||||
entry->data_offset = le32_to_cpu(src->DataOffset);
|
||||
entry->data_len = le32_to_cpu(src->RawDataSize);
|
||||
entry->address = le64_to_cpu(src->MemoryAddress);
|
||||
entry->size = le64_to_cpu(src->MemoryDataSize);
|
||||
entry->type = le32_to_cpu(src->Type);
|
||||
entry->attributes = le32_to_cpu(src->Attributes);
|
||||
|
||||
/* sanity check */
|
||||
if (entry->size < entry->data_len) {
|
||||
error_report("Broken metadata RawDataSize 0x%x MemoryDataSize 0x%lx",
|
||||
entry->data_len, entry->size);
|
||||
return -1;
|
||||
}
|
||||
if (!QEMU_IS_ALIGNED(entry->address, TARGET_PAGE_SIZE)) {
|
||||
error_report("MemoryAddress 0x%lx not page aligned", entry->address);
|
||||
return -1;
|
||||
}
|
||||
if (!QEMU_IS_ALIGNED(entry->size, TARGET_PAGE_SIZE)) {
|
||||
error_report("MemoryDataSize 0x%lx not page aligned", entry->size);
|
||||
return -1;
|
||||
}
|
||||
|
||||
switch (entry->type) {
|
||||
case TDVF_SECTION_TYPE_BFV:
|
||||
case TDVF_SECTION_TYPE_CFV:
|
||||
case TDVF_SECTION_TYPE_PAYLOAD:
|
||||
/* The sections that must be copied from firmware image to TD memory */
|
||||
if (entry->data_len == 0) {
|
||||
error_report("%d section with RawDataSize == 0", entry->type);
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
case TDVF_SECTION_TYPE_TD_HOB:
|
||||
case TDVF_SECTION_TYPE_TEMP_MEM:
|
||||
case TDVF_SECTION_TYPE_PERM_MEM:
|
||||
/* The sections that no need to be copied from firmware image */
|
||||
if (entry->data_len != 0) {
|
||||
error_report("%d section with RawDataSize 0x%x != 0",
|
||||
entry->type, entry->data_len);
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
error_report("TDVF contains unsupported section type %d", entry->type);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size)
|
||||
{
|
||||
TdvfSectionEntry *sections;
|
||||
TdvfMetadata *metadata;
|
||||
ssize_t entries_size;
|
||||
uint32_t len, i;
|
||||
|
||||
metadata = tdvf_get_metadata(fw, flash_ptr, size);
|
||||
if (!metadata) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
//load and parse metadata entries
|
||||
fw->nr_entries = le32_to_cpu(metadata->NumberOfSectionEntries);
|
||||
if (fw->nr_entries < 2) {
|
||||
error_report("Invalid number of fw entries (%u) in TDVF", fw->nr_entries);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
len = le32_to_cpu(metadata->Length);
|
||||
entries_size = fw->nr_entries * sizeof(TdvfSectionEntry);
|
||||
if (len != sizeof(*metadata) + entries_size) {
|
||||
error_report("TDVF metadata len (0x%x) mismatch, expected (0x%x)",
|
||||
len, (uint32_t)(sizeof(*metadata) + entries_size));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
fw->entries = g_new(TdxFirmwareEntry, fw->nr_entries);
|
||||
sections = g_new(TdvfSectionEntry, fw->nr_entries);
|
||||
|
||||
if (!memcpy(sections, (void *)metadata + sizeof(*metadata), entries_size)) {
|
||||
error_report("Failed to read TDVF section entries");
|
||||
goto err;
|
||||
}
|
||||
|
||||
for (i = 0; i < fw->nr_entries; i++) {
|
||||
if (tdvf_parse_and_check_section_entry(§ions[i], &fw->entries[i])) {
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
g_free(sections);
|
||||
|
||||
fw->mem_ptr = flash_ptr;
|
||||
return 0;
|
||||
|
||||
err:
|
||||
g_free(sections);
|
||||
fw->entries = 0;
|
||||
g_free(fw->entries);
|
||||
return -EINVAL;
|
||||
}
|
133
hw/i386/x86.c
133
hw/i386/x86.c
@@ -26,7 +26,7 @@
|
||||
#include "qemu/cutils.h"
|
||||
#include "qemu/units.h"
|
||||
#include "qemu/datadir.h"
|
||||
#include "qemu/guest-random.h"
|
||||
#include "qom/object_interfaces.h"
|
||||
#include "qapi/error.h"
|
||||
#include "qapi/qmp/qerror.h"
|
||||
#include "qapi/qapi-visit-common.h"
|
||||
@@ -37,7 +37,6 @@
|
||||
#include "sysemu/whpx.h"
|
||||
#include "sysemu/numa.h"
|
||||
#include "sysemu/replay.h"
|
||||
#include "sysemu/reset.h"
|
||||
#include "sysemu/sysemu.h"
|
||||
#include "sysemu/cpu-timers.h"
|
||||
#include "sysemu/xen.h"
|
||||
@@ -50,6 +49,7 @@
|
||||
#include "hw/intc/i8259.h"
|
||||
#include "hw/rtc/mc146818rtc.h"
|
||||
#include "target/i386/sev.h"
|
||||
#include "kvm/tdx.h"
|
||||
|
||||
#include "hw/acpi/cpu_hotplug.h"
|
||||
#include "hw/irq.h"
|
||||
@@ -657,12 +657,12 @@ DeviceState *ioapic_init_secondary(GSIState *gsi_state)
|
||||
return dev;
|
||||
}
|
||||
|
||||
typedef struct SetupData {
|
||||
struct setup_data {
|
||||
uint64_t next;
|
||||
uint32_t type;
|
||||
uint32_t len;
|
||||
uint8_t data[];
|
||||
} __attribute__((packed)) SetupData;
|
||||
} __attribute__((packed));
|
||||
|
||||
|
||||
/*
|
||||
@@ -769,35 +769,10 @@ static bool load_elfboot(const char *kernel_filename,
|
||||
return true;
|
||||
}
|
||||
|
||||
typedef struct SetupDataFixup {
|
||||
void *pos;
|
||||
hwaddr orig_val, new_val;
|
||||
uint32_t addr;
|
||||
} SetupDataFixup;
|
||||
|
||||
static void fixup_setup_data(void *opaque)
|
||||
{
|
||||
SetupDataFixup *fixup = opaque;
|
||||
stq_p(fixup->pos, fixup->new_val);
|
||||
}
|
||||
|
||||
static void reset_setup_data(void *opaque)
|
||||
{
|
||||
SetupDataFixup *fixup = opaque;
|
||||
stq_p(fixup->pos, fixup->orig_val);
|
||||
}
|
||||
|
||||
static void reset_rng_seed(void *opaque)
|
||||
{
|
||||
SetupData *setup_data = opaque;
|
||||
qemu_guest_getrandom_nofail(setup_data->data, le32_to_cpu(setup_data->len));
|
||||
}
|
||||
|
||||
void x86_load_linux(X86MachineState *x86ms,
|
||||
FWCfgState *fw_cfg,
|
||||
int acpi_data_size,
|
||||
bool pvh_enabled,
|
||||
bool legacy_no_rng_seed)
|
||||
bool pvh_enabled)
|
||||
{
|
||||
bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled;
|
||||
uint16_t protocol;
|
||||
@@ -805,17 +780,16 @@ void x86_load_linux(X86MachineState *x86ms,
|
||||
int dtb_size, setup_data_offset;
|
||||
uint32_t initrd_max;
|
||||
uint8_t header[8192], *setup, *kernel;
|
||||
hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0, first_setup_data = 0;
|
||||
hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0;
|
||||
FILE *f;
|
||||
char *vmode;
|
||||
MachineState *machine = MACHINE(x86ms);
|
||||
SetupData *setup_data;
|
||||
struct setup_data *setup_data;
|
||||
const char *kernel_filename = machine->kernel_filename;
|
||||
const char *initrd_filename = machine->initrd_filename;
|
||||
const char *dtb_filename = machine->dtb;
|
||||
const char *kernel_cmdline = machine->kernel_cmdline;
|
||||
SevKernelLoaderContext sev_load_ctx = {};
|
||||
enum { RNG_SEED_LENGTH = 32 };
|
||||
|
||||
/* Align to 16 bytes as a paranoia measure */
|
||||
cmdline_size = (strlen(kernel_cmdline) + 16) & ~15;
|
||||
@@ -1092,41 +1066,19 @@ void x86_load_linux(X86MachineState *x86ms,
|
||||
}
|
||||
|
||||
setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16);
|
||||
kernel_size = setup_data_offset + sizeof(SetupData) + dtb_size;
|
||||
kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size;
|
||||
kernel = g_realloc(kernel, kernel_size);
|
||||
|
||||
stq_p(header + 0x250, prot_addr + setup_data_offset);
|
||||
|
||||
setup_data = (SetupData *)(kernel + setup_data_offset);
|
||||
setup_data->next = cpu_to_le64(first_setup_data);
|
||||
first_setup_data = prot_addr + setup_data_offset;
|
||||
setup_data = (struct setup_data *)(kernel + setup_data_offset);
|
||||
setup_data->next = 0;
|
||||
setup_data->type = cpu_to_le32(SETUP_DTB);
|
||||
setup_data->len = cpu_to_le32(dtb_size);
|
||||
|
||||
load_image_size(dtb_filename, setup_data->data, dtb_size);
|
||||
}
|
||||
|
||||
if (!legacy_no_rng_seed) {
|
||||
setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16);
|
||||
kernel_size = setup_data_offset + sizeof(SetupData) + RNG_SEED_LENGTH;
|
||||
kernel = g_realloc(kernel, kernel_size);
|
||||
setup_data = (SetupData *)(kernel + setup_data_offset);
|
||||
setup_data->next = cpu_to_le64(first_setup_data);
|
||||
first_setup_data = prot_addr + setup_data_offset;
|
||||
setup_data->type = cpu_to_le32(SETUP_RNG_SEED);
|
||||
setup_data->len = cpu_to_le32(RNG_SEED_LENGTH);
|
||||
qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH);
|
||||
qemu_register_reset_nosnapshotload(reset_rng_seed, setup_data);
|
||||
fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_KERNEL_DATA, reset_rng_seed, NULL,
|
||||
setup_data, kernel, kernel_size, true);
|
||||
} else {
|
||||
fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
|
||||
}
|
||||
|
||||
fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
|
||||
fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
|
||||
sev_load_ctx.kernel_data = (char *)kernel;
|
||||
sev_load_ctx.kernel_size = kernel_size;
|
||||
|
||||
/*
|
||||
* If we're starting an encrypted VM, it will be OVMF based, which uses the
|
||||
* efi stub for booting and doesn't require any values to be placed in the
|
||||
@@ -1135,20 +1087,16 @@ void x86_load_linux(X86MachineState *x86ms,
|
||||
* file the user passed in.
|
||||
*/
|
||||
if (!sev_enabled()) {
|
||||
SetupDataFixup *fixup = g_malloc(sizeof(*fixup));
|
||||
|
||||
memcpy(setup, header, MIN(sizeof(header), setup_size));
|
||||
/* Offset 0x250 is a pointer to the first setup_data link. */
|
||||
fixup->pos = setup + 0x250;
|
||||
fixup->orig_val = ldq_p(fixup->pos);
|
||||
fixup->new_val = first_setup_data;
|
||||
fixup->addr = cpu_to_le32(real_addr);
|
||||
fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_SETUP_ADDR, fixup_setup_data, NULL,
|
||||
fixup, &fixup->addr, sizeof(fixup->addr), true);
|
||||
qemu_register_reset(reset_setup_data, fixup);
|
||||
} else {
|
||||
fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
|
||||
}
|
||||
|
||||
fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr);
|
||||
fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
|
||||
fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size);
|
||||
sev_load_ctx.kernel_data = (char *)kernel;
|
||||
sev_load_ctx.kernel_size = kernel_size;
|
||||
|
||||
fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr);
|
||||
fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size);
|
||||
fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);
|
||||
sev_load_ctx.setup_data = (char *)setup;
|
||||
@@ -1187,9 +1135,16 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
|
||||
(bios_size % 65536) != 0) {
|
||||
goto bios_error;
|
||||
}
|
||||
|
||||
bios = g_malloc(sizeof(*bios));
|
||||
memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal);
|
||||
if (sev_enabled()) {
|
||||
if (is_tdx_vm()) {
|
||||
memory_region_init_ram_restricted(bios, NULL, "pc.bios", bios_size, &error_fatal);
|
||||
tdx_set_tdvf_region(bios);
|
||||
} else {
|
||||
memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal);
|
||||
}
|
||||
|
||||
if (sev_enabled() || is_tdx_vm()) {
|
||||
/*
|
||||
* The concept of a "reset" simply doesn't exist for
|
||||
* confidential computing guests, we have to destroy and
|
||||
@@ -1211,17 +1166,20 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
|
||||
}
|
||||
g_free(filename);
|
||||
|
||||
/* map the last 128KB of the BIOS in ISA space */
|
||||
isa_bios_size = MIN(bios_size, 128 * KiB);
|
||||
isa_bios = g_malloc(sizeof(*isa_bios));
|
||||
memory_region_init_alias(isa_bios, NULL, "isa-bios", bios,
|
||||
bios_size - isa_bios_size, isa_bios_size);
|
||||
memory_region_add_subregion_overlap(rom_memory,
|
||||
0x100000 - isa_bios_size,
|
||||
isa_bios,
|
||||
1);
|
||||
if (!isapc_ram_fw) {
|
||||
memory_region_set_readonly(isa_bios, true);
|
||||
/* For TDX, alias different GPAs to same private memory is not supported */
|
||||
if (!is_tdx_vm()) {
|
||||
/* map the last 128KB of the BIOS in ISA space */
|
||||
isa_bios_size = MIN(bios_size, 128 * KiB);
|
||||
isa_bios = g_malloc(sizeof(*isa_bios));
|
||||
memory_region_init_alias(isa_bios, NULL, "isa-bios", bios,
|
||||
bios_size - isa_bios_size, isa_bios_size);
|
||||
memory_region_add_subregion_overlap(rom_memory,
|
||||
0x100000 - isa_bios_size,
|
||||
isa_bios,
|
||||
1);
|
||||
if (!isapc_ram_fw) {
|
||||
memory_region_set_readonly(isa_bios, true);
|
||||
}
|
||||
}
|
||||
|
||||
/* map all the bios at the top of memory */
|
||||
@@ -1419,6 +1377,11 @@ static void machine_set_sgx_epc(Object *obj, Visitor *v, const char *name,
|
||||
qapi_free_SgxEPCList(list);
|
||||
}
|
||||
|
||||
static int x86_kvm_type(MachineState *ms, const char *vm_type)
|
||||
{
|
||||
return kvm_get_vm_type(ms, vm_type);
|
||||
}
|
||||
|
||||
static void x86_machine_initfn(Object *obj)
|
||||
{
|
||||
X86MachineState *x86ms = X86_MACHINE(obj);
|
||||
@@ -1432,6 +1395,7 @@ static void x86_machine_initfn(Object *obj)
|
||||
x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
|
||||
x86ms->bus_lock_ratelimit = 0;
|
||||
x86ms->above_4g_mem_start = 4 * GiB;
|
||||
x86ms->eoi_intercept_unsupported = false;
|
||||
}
|
||||
|
||||
static void x86_machine_class_init(ObjectClass *oc, void *data)
|
||||
@@ -1443,6 +1407,7 @@ static void x86_machine_class_init(ObjectClass *oc, void *data)
|
||||
mc->cpu_index_to_instance_props = x86_cpu_index_to_props;
|
||||
mc->get_default_cpu_node_id = x86_get_default_cpu_node_id;
|
||||
mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids;
|
||||
mc->kvm_type = x86_kvm_type;
|
||||
x86mc->save_tsc_khz = true;
|
||||
x86mc->fwcfg_dma_enabled = true;
|
||||
nc->nmi_monitor_handler = x86_nmi;
|
||||
|
@@ -262,6 +262,15 @@ void apic_designate_bsp(DeviceState *dev, bool bsp)
|
||||
}
|
||||
}
|
||||
|
||||
void apic_force_x2apic(DeviceState *dev)
|
||||
{
|
||||
if (dev == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
APIC_COMMON(dev)->force_x2apic = true;
|
||||
}
|
||||
|
||||
static void apic_reset_common(DeviceState *dev)
|
||||
{
|
||||
APICCommonState *s = APIC_COMMON(dev);
|
||||
@@ -270,6 +279,9 @@ static void apic_reset_common(DeviceState *dev)
|
||||
|
||||
bsp = s->apicbase & MSR_IA32_APICBASE_BSP;
|
||||
s->apicbase = APIC_DEFAULT_ADDRESS | bsp | MSR_IA32_APICBASE_ENABLE;
|
||||
if (s->force_x2apic) {
|
||||
s->apicbase |= MSR_IA32_APICBASE_EXTD;
|
||||
}
|
||||
s->id = s->initial_apic_id;
|
||||
|
||||
apic_reset_irq_delivered();
|
||||
|
@@ -404,14 +404,14 @@ static bool nvme_addr_is_pmr(NvmeCtrl *n, hwaddr addr)
|
||||
return false;
|
||||
}
|
||||
|
||||
hi = n->pmr.cba + int128_get64(n->pmr.dev->mr.size);
|
||||
hi = n->pmr.cba + int128_get64(n->pmr.dev->mr->size);
|
||||
|
||||
return addr >= n->pmr.cba && addr < hi;
|
||||
}
|
||||
|
||||
static inline void *nvme_addr_to_pmr(NvmeCtrl *n, hwaddr addr)
|
||||
{
|
||||
return memory_region_get_ram_ptr(&n->pmr.dev->mr) + (addr - n->pmr.cba);
|
||||
return memory_region_get_ram_ptr(n->pmr.dev->mr) + (addr - n->pmr.cba);
|
||||
}
|
||||
|
||||
static inline bool nvme_addr_is_iomem(NvmeCtrl *n, hwaddr addr)
|
||||
@@ -6317,7 +6317,7 @@ static void nvme_ctrl_shutdown(NvmeCtrl *n)
|
||||
int i;
|
||||
|
||||
if (n->pmr.dev) {
|
||||
memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size);
|
||||
memory_region_msync(n->pmr.dev->mr, 0, n->pmr.dev->size);
|
||||
}
|
||||
|
||||
for (i = 1; i <= NVME_MAX_NAMESPACES; i++) {
|
||||
@@ -6645,10 +6645,10 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
|
||||
|
||||
stl_le_p(&n->bar.pmrctl, data);
|
||||
if (NVME_PMRCTL_EN(data)) {
|
||||
memory_region_set_enabled(&n->pmr.dev->mr, true);
|
||||
memory_region_set_enabled(n->pmr.dev->mr, true);
|
||||
pmrsts = 0;
|
||||
} else {
|
||||
memory_region_set_enabled(&n->pmr.dev->mr, false);
|
||||
memory_region_set_enabled(n->pmr.dev->mr, false);
|
||||
NVME_PMRSTS_SET_NRDY(pmrsts, 1);
|
||||
n->pmr.cmse = false;
|
||||
}
|
||||
@@ -6678,7 +6678,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
|
||||
uint64_t pmrmscu = ldl_le_p(&n->bar.pmrmscu);
|
||||
hwaddr cba = pmrmscu << 32 |
|
||||
(NVME_PMRMSCL_CBA(data) << PMRMSCL_CBA_SHIFT);
|
||||
if (cba + int128_get64(n->pmr.dev->mr.size) < cba) {
|
||||
if (cba + int128_get64(n->pmr.dev->mr->size) < cba) {
|
||||
NVME_PMRSTS_SET_CBAI(pmrsts, 1);
|
||||
stl_le_p(&n->bar.pmrsts, pmrsts);
|
||||
return;
|
||||
@@ -6745,7 +6745,7 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size)
|
||||
*/
|
||||
if (addr == NVME_REG_PMRSTS &&
|
||||
(NVME_PMRCAP_PMRWBM(ldl_le_p(&n->bar.pmrcap)) & 0x02)) {
|
||||
memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size);
|
||||
memory_region_msync(n->pmr.dev->mr, 0, n->pmr.dev->size);
|
||||
}
|
||||
|
||||
return ldn_le_p(ptr + addr, size);
|
||||
@@ -7189,9 +7189,9 @@ static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev)
|
||||
pci_register_bar(pci_dev, NVME_PMR_BIR,
|
||||
PCI_BASE_ADDRESS_SPACE_MEMORY |
|
||||
PCI_BASE_ADDRESS_MEM_TYPE_64 |
|
||||
PCI_BASE_ADDRESS_MEM_PREFETCH, &n->pmr.dev->mr);
|
||||
PCI_BASE_ADDRESS_MEM_PREFETCH, n->pmr.dev->mr);
|
||||
|
||||
memory_region_set_enabled(&n->pmr.dev->mr, false);
|
||||
memory_region_set_enabled(n->pmr.dev->mr, false);
|
||||
}
|
||||
|
||||
static uint64_t nvme_bar_size(unsigned total_queues, unsigned total_irqs,
|
||||
|
@@ -375,7 +375,8 @@ static void fw_cfg_dma_transfer(FWCfgState *s)
|
||||
} else {
|
||||
dma.length = 0;
|
||||
}
|
||||
|
||||
trace_fw_cfg_transfer(dma.address, dma.length, arch,
|
||||
(s->cur_entry & FW_CFG_ENTRY_MASK), read, write);
|
||||
dma.control = 0;
|
||||
|
||||
while (dma.length > 0 && !(dma.control & FW_CFG_DMA_CTL_ERROR)) {
|
||||
@@ -693,12 +694,12 @@ static const VMStateDescription vmstate_fw_cfg = {
|
||||
}
|
||||
};
|
||||
|
||||
void fw_cfg_add_bytes_callback(FWCfgState *s, uint16_t key,
|
||||
FWCfgCallback select_cb,
|
||||
FWCfgWriteCallback write_cb,
|
||||
void *callback_opaque,
|
||||
void *data, size_t len,
|
||||
bool read_only)
|
||||
static void fw_cfg_add_bytes_callback(FWCfgState *s, uint16_t key,
|
||||
FWCfgCallback select_cb,
|
||||
FWCfgWriteCallback write_cb,
|
||||
void *callback_opaque,
|
||||
void *data, size_t len,
|
||||
bool read_only)
|
||||
{
|
||||
int arch = !!(key & FW_CFG_ARCH_LOCAL);
|
||||
|
||||
|
@@ -13,6 +13,7 @@ fw_cfg_add_string(uint16_t key_value, const char *key_name, const char *value) "
|
||||
fw_cfg_add_i16(uint16_t key_value, const char *key_name, uint16_t value) "key 0x%04" PRIx16 " '%s', value 0x%" PRIx16
|
||||
fw_cfg_add_i32(uint16_t key_value, const char *key_name, uint32_t value) "key 0x%04" PRIx16 " '%s', value 0x%" PRIx32
|
||||
fw_cfg_add_i64(uint16_t key_value, const char *key_name, uint64_t value) "key 0x%04" PRIx16 " '%s', value 0x%" PRIx64
|
||||
fw_cfg_transfer(uint64_t dma_addr, uint64_t length, int arch, uint64_t entry, int read, int write) "dma 0x%" PRIx64 " length 0x%" PRIx64 " arch %d entry 0x%" PRIx64 " read %d write %d"
|
||||
|
||||
# mac_nvram.c
|
||||
macio_nvram_read(uint32_t addr, uint8_t val) "read addr=0x%04"PRIx32" val=0x%02x"
|
||||
|
@@ -796,6 +796,12 @@ int pvrdma_exec_cmd(PVRDMADev *dev)
|
||||
|
||||
dsr_info = &dev->dsr_info;
|
||||
|
||||
if (!dsr_info->dsr) {
|
||||
/* Buggy or malicious guest driver */
|
||||
rdma_error_report("Exec command without dsr, req or rsp buffers");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (dsr_info->req->hdr.cmd >= sizeof(cmd_handlers) /
|
||||
sizeof(struct cmd_handler)) {
|
||||
rdma_error_report("Unsupported command");
|
||||
|
@@ -1134,15 +1134,24 @@ static void lsi_execute_script(LSIState *s)
|
||||
uint32_t addr, addr_high;
|
||||
int opcode;
|
||||
int insn_processed = 0;
|
||||
static int reentrancy_level;
|
||||
|
||||
reentrancy_level++;
|
||||
|
||||
s->istat1 |= LSI_ISTAT1_SRUN;
|
||||
again:
|
||||
if (++insn_processed > LSI_MAX_INSN) {
|
||||
/* Some windows drivers make the device spin waiting for a memory
|
||||
location to change. If we have been executed a lot of code then
|
||||
assume this is the case and force an unexpected device disconnect.
|
||||
This is apparently sufficient to beat the drivers into submission.
|
||||
*/
|
||||
/*
|
||||
* Some windows drivers make the device spin waiting for a memory location
|
||||
* to change. If we have executed more than LSI_MAX_INSN instructions then
|
||||
* assume this is the case and force an unexpected device disconnect. This
|
||||
* is apparently sufficient to beat the drivers into submission.
|
||||
*
|
||||
* Another issue (CVE-2023-0330) can occur if the script is programmed to
|
||||
* trigger itself again and again. Avoid this problem by stopping after
|
||||
* being called multiple times in a reentrant way (8 is an arbitrary value
|
||||
* which should be enough for all valid use cases).
|
||||
*/
|
||||
if (++insn_processed > LSI_MAX_INSN || reentrancy_level > 8) {
|
||||
if (!(s->sien0 & LSI_SIST0_UDC)) {
|
||||
qemu_log_mask(LOG_GUEST_ERROR,
|
||||
"lsi_scsi: inf. loop with UDC masked");
|
||||
@@ -1596,6 +1605,8 @@ again:
|
||||
}
|
||||
}
|
||||
trace_lsi_execute_script_stop();
|
||||
|
||||
reentrancy_level--;
|
||||
}
|
||||
|
||||
static uint8_t lsi_reg_readb(LSIState *s, int offset)
|
||||
|
@@ -1927,7 +1927,7 @@ static void megasas_command_cancelled(SCSIRequest *req)
|
||||
{
|
||||
MegasasCmd *cmd = req->hba_private;
|
||||
|
||||
if (!cmd) {
|
||||
if (!cmd || !cmd->frame) {
|
||||
return;
|
||||
}
|
||||
cmd->frame->header.cmd_status = MFI_STAT_SCSI_IO_FAILED;
|
||||
|
@@ -322,11 +322,17 @@ static void scsi_read_complete(void * opaque, int ret)
|
||||
if (r->req.cmd.buf[0] == READ_CAPACITY_10 &&
|
||||
(ldl_be_p(&r->buf[0]) != 0xffffffffU || s->max_lba == 0)) {
|
||||
s->blocksize = ldl_be_p(&r->buf[4]);
|
||||
s->max_lba = ldl_be_p(&r->buf[0]) & 0xffffffffULL;
|
||||
BlockBackend *blk = s->conf.blk;
|
||||
BlockDriverState *bs = blk_bs(blk);
|
||||
s->max_lba = bs->total_sectors - 1;
|
||||
stl_be_p(&r->buf[0], s->max_lba);
|
||||
} else if (r->req.cmd.buf[0] == SERVICE_ACTION_IN_16 &&
|
||||
(r->req.cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) {
|
||||
s->blocksize = ldl_be_p(&r->buf[8]);
|
||||
s->max_lba = ldq_be_p(&r->buf[0]);
|
||||
BlockBackend *blk = s->conf.blk;
|
||||
BlockDriverState *bs = blk_bs(blk);
|
||||
s->max_lba = bs->total_sectors - 1;
|
||||
stq_be_p(&r->buf[0], s->max_lba);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -391,7 +397,10 @@ static void scsi_write_complete(void * opaque, int ret)
|
||||
|
||||
aio_context_acquire(blk_get_aio_context(s->conf.blk));
|
||||
|
||||
if (ret || r->req.io_canceled) {
|
||||
if (ret || r->req.io_canceled ||
|
||||
r->io_header.status != SCSI_HOST_OK ||
|
||||
(r->io_header.driver_status & SG_ERR_DRIVER_TIMEOUT) ||
|
||||
r->io_header.status != GOOD) {
|
||||
scsi_command_complete_noio(r, ret);
|
||||
goto done;
|
||||
}
|
||||
|
@@ -749,14 +749,16 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance)
|
||||
t->core_count = (ms->smp.cores > 255) ? 0xFF : ms->smp.cores;
|
||||
t->core_enabled = t->core_count;
|
||||
|
||||
t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores);
|
||||
|
||||
t->thread_count = (ms->smp.threads > 255) ? 0xFF : ms->smp.threads;
|
||||
t->thread_count2 = cpu_to_le16(ms->smp.threads);
|
||||
|
||||
t->processor_characteristics = cpu_to_le16(0x02); /* Unknown */
|
||||
t->processor_family2 = cpu_to_le16(0x01); /* Other */
|
||||
|
||||
if (tbl_len == SMBIOS_TYPE_4_LEN_V30) {
|
||||
t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores);
|
||||
t->thread_count2 = cpu_to_le16(ms->smp.threads);
|
||||
}
|
||||
|
||||
SMBIOS_BUILD_TABLE_POST;
|
||||
smbios_type4_count++;
|
||||
}
|
||||
|
@@ -633,6 +633,11 @@ virtio_crypto_sym_op_helper(VirtIODevice *vdev,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (unlikely(src_len != dst_len)) {
|
||||
virtio_error(vdev, "sym request src len is different from dst len");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
max_len = (uint64_t)iv_len + aad_len + src_len + dst_len + hash_result_len;
|
||||
if (unlikely(max_len > vcrypto->conf.max_size)) {
|
||||
virtio_error(vdev, "virtio-crypto too big length");
|
||||
|
@@ -448,7 +448,7 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
|
||||
uint64_t size, bool plug)
|
||||
{
|
||||
const uint64_t offset = start_gpa - vmem->addr;
|
||||
RAMBlock *rb = vmem->memdev->mr.ram_block;
|
||||
RAMBlock *rb = vmem->memdev->mr->ram_block;
|
||||
|
||||
if (virtio_mem_is_busy()) {
|
||||
return -EBUSY;
|
||||
@@ -463,8 +463,8 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
|
||||
int ret = 0;
|
||||
|
||||
if (vmem->prealloc) {
|
||||
void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset;
|
||||
int fd = memory_region_get_fd(&vmem->memdev->mr);
|
||||
void *area = memory_region_get_ram_ptr(vmem->memdev->mr) + offset;
|
||||
int fd = memory_region_get_fd(vmem->memdev->mr);
|
||||
Error *local_err = NULL;
|
||||
|
||||
qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err);
|
||||
@@ -490,7 +490,7 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
|
||||
|
||||
if (ret) {
|
||||
/* Could be preallocation or a notifier populated memory. */
|
||||
ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size);
|
||||
ram_block_discard_range(vmem->memdev->mr->ram_block, offset, size);
|
||||
return -EBUSY;
|
||||
}
|
||||
}
|
||||
@@ -558,7 +558,7 @@ static void virtio_mem_resize_usable_region(VirtIOMEM *vmem,
|
||||
uint64_t requested_size,
|
||||
bool can_shrink)
|
||||
{
|
||||
uint64_t newsize = MIN(memory_region_size(&vmem->memdev->mr),
|
||||
uint64_t newsize = MIN(memory_region_size(vmem->memdev->mr),
|
||||
requested_size + VIRTIO_MEM_USABLE_EXTENT);
|
||||
|
||||
/* The usable region size always has to be multiples of the block size. */
|
||||
@@ -578,7 +578,7 @@ static void virtio_mem_resize_usable_region(VirtIOMEM *vmem,
|
||||
|
||||
static int virtio_mem_unplug_all(VirtIOMEM *vmem)
|
||||
{
|
||||
RAMBlock *rb = vmem->memdev->mr.ram_block;
|
||||
RAMBlock *rb = vmem->memdev->mr->ram_block;
|
||||
|
||||
if (virtio_mem_is_busy()) {
|
||||
return -EBUSY;
|
||||
@@ -705,7 +705,7 @@ static void virtio_mem_get_config(VirtIODevice *vdev, uint8_t *config_data)
|
||||
config->requested_size = cpu_to_le64(vmem->requested_size);
|
||||
config->plugged_size = cpu_to_le64(vmem->size);
|
||||
config->addr = cpu_to_le64(vmem->addr);
|
||||
config->region_size = cpu_to_le64(memory_region_size(&vmem->memdev->mr));
|
||||
config->region_size = cpu_to_le64(memory_region_size(vmem->memdev->mr));
|
||||
config->usable_region_size = cpu_to_le64(vmem->usable_region_size);
|
||||
}
|
||||
|
||||
@@ -766,9 +766,9 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
|
||||
VIRTIO_MEM_MEMDEV_PROP,
|
||||
object_get_canonical_path_component(OBJECT(vmem->memdev)));
|
||||
return;
|
||||
} else if (!memory_region_is_ram(&vmem->memdev->mr) ||
|
||||
memory_region_is_rom(&vmem->memdev->mr) ||
|
||||
!vmem->memdev->mr.ram_block) {
|
||||
} else if (!memory_region_is_ram(vmem->memdev->mr) ||
|
||||
memory_region_is_rom(vmem->memdev->mr) ||
|
||||
!vmem->memdev->mr->ram_block) {
|
||||
error_setg(errp, "'%s' property specifies an unsupported memdev",
|
||||
VIRTIO_MEM_MEMDEV_PROP);
|
||||
return;
|
||||
@@ -787,7 +787,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
|
||||
return;
|
||||
}
|
||||
|
||||
rb = vmem->memdev->mr.ram_block;
|
||||
rb = vmem->memdev->mr->ram_block;
|
||||
page_size = qemu_ram_pagesize(rb);
|
||||
|
||||
#if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
|
||||
@@ -841,7 +841,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
|
||||
")", VIRTIO_MEM_ADDR_PROP, VIRTIO_MEM_BLOCK_SIZE_PROP,
|
||||
vmem->block_size);
|
||||
return;
|
||||
} else if (!QEMU_IS_ALIGNED(memory_region_size(&vmem->memdev->mr),
|
||||
} else if (!QEMU_IS_ALIGNED(memory_region_size(vmem->memdev->mr),
|
||||
vmem->block_size)) {
|
||||
error_setg(errp, "'%s' property memdev size has to be multiples of"
|
||||
"'%s' (0x%" PRIx64 ")", VIRTIO_MEM_MEMDEV_PROP,
|
||||
@@ -863,7 +863,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
|
||||
|
||||
virtio_mem_resize_usable_region(vmem, vmem->requested_size, true);
|
||||
|
||||
vmem->bitmap_size = memory_region_size(&vmem->memdev->mr) /
|
||||
vmem->bitmap_size = memory_region_size(vmem->memdev->mr) /
|
||||
vmem->block_size;
|
||||
vmem->bitmap = bitmap_new(vmem->bitmap_size);
|
||||
|
||||
@@ -871,14 +871,14 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
|
||||
vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request);
|
||||
|
||||
host_memory_backend_set_mapped(vmem->memdev, true);
|
||||
vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem));
|
||||
vmstate_register_ram(vmem->memdev->mr, DEVICE(vmem));
|
||||
qemu_register_reset(virtio_mem_system_reset, vmem);
|
||||
|
||||
/*
|
||||
* Set ourselves as RamDiscardManager before the plug handler maps the
|
||||
* memory region and exposes it via an address space.
|
||||
*/
|
||||
memory_region_set_ram_discard_manager(&vmem->memdev->mr,
|
||||
memory_region_set_ram_discard_manager(vmem->memdev->mr,
|
||||
RAM_DISCARD_MANAGER(vmem));
|
||||
}
|
||||
|
||||
@@ -891,9 +891,9 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
|
||||
* The unplug handler unmapped the memory region, it cannot be
|
||||
* found via an address space anymore. Unset ourselves.
|
||||
*/
|
||||
memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
|
||||
memory_region_set_ram_discard_manager(vmem->memdev->mr, NULL);
|
||||
qemu_unregister_reset(virtio_mem_system_reset, vmem);
|
||||
vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem));
|
||||
vmstate_unregister_ram(vmem->memdev->mr, DEVICE(vmem));
|
||||
host_memory_backend_set_mapped(vmem->memdev, false);
|
||||
virtio_del_queue(vdev, 0);
|
||||
virtio_cleanup(vdev);
|
||||
@@ -904,7 +904,7 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
|
||||
static int virtio_mem_discard_range_cb(const VirtIOMEM *vmem, void *arg,
|
||||
uint64_t offset, uint64_t size)
|
||||
{
|
||||
RAMBlock *rb = vmem->memdev->mr.ram_block;
|
||||
RAMBlock *rb = vmem->memdev->mr->ram_block;
|
||||
|
||||
return ram_block_discard_range(rb, offset, size) ? -EINVAL : 0;
|
||||
}
|
||||
@@ -955,7 +955,7 @@ static int virtio_mem_mig_sanity_checks_pre_save(void *opaque)
|
||||
VirtIOMEM *vmem = tmp->parent;
|
||||
|
||||
tmp->addr = vmem->addr;
|
||||
tmp->region_size = memory_region_size(&vmem->memdev->mr);
|
||||
tmp->region_size = memory_region_size(vmem->memdev->mr);
|
||||
tmp->block_size = vmem->block_size;
|
||||
tmp->node = vmem->node;
|
||||
return 0;
|
||||
@@ -965,7 +965,7 @@ static int virtio_mem_mig_sanity_checks_post_load(void *opaque, int version_id)
|
||||
{
|
||||
VirtIOMEMMigSanityChecks *tmp = opaque;
|
||||
VirtIOMEM *vmem = tmp->parent;
|
||||
const uint64_t new_region_size = memory_region_size(&vmem->memdev->mr);
|
||||
const uint64_t new_region_size = memory_region_size(vmem->memdev->mr);
|
||||
|
||||
if (tmp->addr != vmem->addr) {
|
||||
error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64,
|
||||
@@ -1043,7 +1043,7 @@ static void virtio_mem_fill_device_info(const VirtIOMEM *vmem,
|
||||
vi->node = vmem->node;
|
||||
vi->requested_size = vmem->requested_size;
|
||||
vi->size = vmem->size;
|
||||
vi->max_size = memory_region_size(&vmem->memdev->mr);
|
||||
vi->max_size = memory_region_size(vmem->memdev->mr);
|
||||
vi->block_size = vmem->block_size;
|
||||
vi->memdev = object_get_canonical_path(OBJECT(vmem->memdev));
|
||||
}
|
||||
@@ -1055,7 +1055,7 @@ static MemoryRegion *virtio_mem_get_memory_region(VirtIOMEM *vmem, Error **errp)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return &vmem->memdev->mr;
|
||||
return vmem->memdev->mr;
|
||||
}
|
||||
|
||||
static void virtio_mem_add_size_change_notifier(VirtIOMEM *vmem,
|
||||
@@ -1113,10 +1113,10 @@ static void virtio_mem_set_requested_size(Object *obj, Visitor *v,
|
||||
")", name, VIRTIO_MEM_BLOCK_SIZE_PROP,
|
||||
vmem->block_size);
|
||||
return;
|
||||
} else if (value > memory_region_size(&vmem->memdev->mr)) {
|
||||
} else if (value > memory_region_size(vmem->memdev->mr)) {
|
||||
error_setg(errp, "'%s' cannot exceed the memory backend size"
|
||||
"(0x%" PRIx64 ")", name,
|
||||
memory_region_size(&vmem->memdev->mr));
|
||||
memory_region_size(vmem->memdev->mr));
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1145,8 +1145,8 @@ static void virtio_mem_get_block_size(Object *obj, Visitor *v, const char *name,
|
||||
* default block size we would use with the current memory backend.
|
||||
*/
|
||||
if (!value) {
|
||||
if (vmem->memdev && memory_region_is_ram(&vmem->memdev->mr)) {
|
||||
value = virtio_mem_default_block_size(vmem->memdev->mr.ram_block);
|
||||
if (vmem->memdev && memory_region_is_ram(vmem->memdev->mr)) {
|
||||
value = virtio_mem_default_block_size(vmem->memdev->mr->ram_block);
|
||||
} else {
|
||||
value = virtio_mem_thp_size();
|
||||
}
|
||||
@@ -1219,7 +1219,7 @@ static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager *rdm,
|
||||
{
|
||||
const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
|
||||
|
||||
g_assert(mr == &vmem->memdev->mr);
|
||||
g_assert(mr == vmem->memdev->mr);
|
||||
return vmem->block_size;
|
||||
}
|
||||
|
||||
@@ -1230,7 +1230,7 @@ static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm,
|
||||
uint64_t start_gpa = vmem->addr + s->offset_within_region;
|
||||
uint64_t end_gpa = start_gpa + int128_get64(s->size);
|
||||
|
||||
g_assert(s->mr == &vmem->memdev->mr);
|
||||
g_assert(s->mr == vmem->memdev->mr);
|
||||
|
||||
start_gpa = QEMU_ALIGN_DOWN(start_gpa, vmem->block_size);
|
||||
end_gpa = QEMU_ALIGN_UP(end_gpa, vmem->block_size);
|
||||
@@ -1265,7 +1265,7 @@ static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm,
|
||||
.opaque = opaque,
|
||||
};
|
||||
|
||||
g_assert(s->mr == &vmem->memdev->mr);
|
||||
g_assert(s->mr == vmem->memdev->mr);
|
||||
return virtio_mem_for_each_plugged_section(vmem, s, &data,
|
||||
virtio_mem_rdm_replay_populated_cb);
|
||||
}
|
||||
@@ -1290,7 +1290,7 @@ static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
|
||||
.opaque = opaque,
|
||||
};
|
||||
|
||||
g_assert(s->mr == &vmem->memdev->mr);
|
||||
g_assert(s->mr == vmem->memdev->mr);
|
||||
virtio_mem_for_each_unplugged_section(vmem, s, &data,
|
||||
virtio_mem_rdm_replay_discarded_cb);
|
||||
}
|
||||
@@ -1302,7 +1302,7 @@ static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm,
|
||||
VirtIOMEM *vmem = VIRTIO_MEM(rdm);
|
||||
int ret;
|
||||
|
||||
g_assert(s->mr == &vmem->memdev->mr);
|
||||
g_assert(s->mr == vmem->memdev->mr);
|
||||
rdl->section = memory_region_section_new_copy(s);
|
||||
|
||||
QLIST_INSERT_HEAD(&vmem->rdl_list, rdl, next);
|
||||
@@ -1319,7 +1319,7 @@ static void virtio_mem_rdm_unregister_listener(RamDiscardManager *rdm,
|
||||
{
|
||||
VirtIOMEM *vmem = VIRTIO_MEM(rdm);
|
||||
|
||||
g_assert(rdl->section->mr == &vmem->memdev->mr);
|
||||
g_assert(rdl->section->mr == vmem->memdev->mr);
|
||||
if (vmem->size) {
|
||||
if (rdl->double_discard_supported) {
|
||||
rdl->notify_discard(rdl, rdl->section);
|
||||
|
@@ -84,7 +84,7 @@ static void virtio_pmem_flush(VirtIODevice *vdev, VirtQueue *vq)
|
||||
g_free(req_data);
|
||||
return;
|
||||
}
|
||||
req_data->fd = memory_region_get_fd(&backend->mr);
|
||||
req_data->fd = memory_region_get_fd(backend->mr);
|
||||
req_data->pmem = pmem;
|
||||
req_data->vdev = vdev;
|
||||
thread_pool_submit_aio(pool, worker_cb, req_data, done_cb, req_data);
|
||||
@@ -96,7 +96,7 @@ static void virtio_pmem_get_config(VirtIODevice *vdev, uint8_t *config)
|
||||
struct virtio_pmem_config *pmemcfg = (struct virtio_pmem_config *) config;
|
||||
|
||||
virtio_stq_p(vdev, &pmemcfg->start, pmem->start);
|
||||
virtio_stq_p(vdev, &pmemcfg->size, memory_region_size(&pmem->memdev->mr));
|
||||
virtio_stq_p(vdev, &pmemcfg->size, memory_region_size(pmem->memdev->mr));
|
||||
}
|
||||
|
||||
static uint64_t virtio_pmem_get_features(VirtIODevice *vdev, uint64_t features,
|
||||
@@ -140,7 +140,7 @@ static void virtio_pmem_fill_device_info(const VirtIOPMEM *pmem,
|
||||
VirtioPMEMDeviceInfo *vi)
|
||||
{
|
||||
vi->memaddr = pmem->start;
|
||||
vi->size = memory_region_size(&pmem->memdev->mr);
|
||||
vi->size = memory_region_size(pmem->memdev->mr);
|
||||
vi->memdev = object_get_canonical_path(OBJECT(pmem->memdev));
|
||||
}
|
||||
|
||||
@@ -152,7 +152,7 @@ static MemoryRegion *virtio_pmem_get_memory_region(VirtIOPMEM *pmem,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return &pmem->memdev->mr;
|
||||
return pmem->memdev->mr;
|
||||
}
|
||||
|
||||
static Property virtio_pmem_properties[] = {
|
||||
|
@@ -51,12 +51,16 @@ struct ConfidentialGuestSupport {
|
||||
* so 'ready' is not set, we'll abort.
|
||||
*/
|
||||
bool ready;
|
||||
|
||||
bool disable_pv_clock;
|
||||
};
|
||||
|
||||
typedef struct ConfidentialGuestSupportClass {
|
||||
ObjectClass parent;
|
||||
} ConfidentialGuestSupportClass;
|
||||
|
||||
#define CONFIDENTIAL_GUEST_SUPPORT_DISABLE_PV_CLOCK "disable-pv-clock"
|
||||
|
||||
#endif /* !CONFIG_USER_ONLY */
|
||||
|
||||
#endif /* QEMU_CONFIDENTIAL_GUEST_SUPPORT_H */
|
||||
|
@@ -121,6 +121,8 @@ void cpu_address_space_init(CPUState *cpu, int asidx,
|
||||
|
||||
void cpu_physical_memory_rw(hwaddr addr, void *buf,
|
||||
hwaddr len, bool is_write);
|
||||
void cpu_physical_memory_rw_debug(hwaddr addr, void *buf,
|
||||
hwaddr len, bool is_write);
|
||||
static inline void cpu_physical_memory_read(hwaddr addr,
|
||||
void *buf, hwaddr len)
|
||||
{
|
||||
@@ -131,7 +133,20 @@ static inline void cpu_physical_memory_write(hwaddr addr,
|
||||
{
|
||||
cpu_physical_memory_rw(addr, (void *)buf, len, true);
|
||||
}
|
||||
|
||||
void cpu_reloading_memory_map(void);
|
||||
|
||||
static inline void cpu_physical_memory_read_debug(hwaddr addr,
|
||||
void *buf, hwaddr len)
|
||||
{
|
||||
cpu_physical_memory_rw_debug(addr, buf, len, false);
|
||||
}
|
||||
static inline void cpu_physical_memory_write_debug(hwaddr addr,
|
||||
const void *buf, hwaddr len)
|
||||
{
|
||||
cpu_physical_memory_rw_debug(addr, (void *)buf, len, true);
|
||||
}
|
||||
|
||||
void *cpu_physical_memory_map(hwaddr addr,
|
||||
hwaddr *plen,
|
||||
bool is_write);
|
||||
@@ -155,6 +170,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque);
|
||||
|
||||
int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
|
||||
int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
|
||||
int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length,
|
||||
bool shared_to_private);
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -57,6 +57,8 @@ typedef struct MemTxAttrs {
|
||||
unsigned int target_tlb_bit0 : 1;
|
||||
unsigned int target_tlb_bit1 : 1;
|
||||
unsigned int target_tlb_bit2 : 1;
|
||||
/* Memory access request from the debugger */
|
||||
unsigned int debug:1;
|
||||
} MemTxAttrs;
|
||||
|
||||
/* Bus masters which don't specify any attributes will get this,
|
||||
@@ -65,6 +67,7 @@ typedef struct MemTxAttrs {
|
||||
* from "didn't specify" if necessary).
|
||||
*/
|
||||
#define MEMTXATTRS_UNSPECIFIED ((MemTxAttrs) { .unspecified = 1 })
|
||||
#define MEMTXATTRS_UNSPECIFIED_DEBUG ((MemTxAttrs) { .unspecified = 1, .debug = 1 })
|
||||
|
||||
/* New-style MMIO accessors can indicate that the transaction failed.
|
||||
* A zero (MEMTX_OK) response means success; anything else is a failure
|
||||
|
@@ -720,6 +720,19 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
|
||||
typedef struct CoalescedMemoryRange CoalescedMemoryRange;
|
||||
typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd;
|
||||
|
||||
/* Memory Region RAM debug callback */
|
||||
typedef struct MemoryRegionRAMReadWriteOps MemoryRegionRAMReadWriteOps;
|
||||
|
||||
struct MemoryRegionRAMReadWriteOps {
|
||||
/* Write data into guest memory */
|
||||
int (*write) (uint8_t *hva_dest, hwaddr gpa_des,
|
||||
const uint8_t *src, uint32_t len, MemTxAttrs attrs);
|
||||
/* Read data from guest memory */
|
||||
int (*read) (uint8_t *dest,
|
||||
const uint8_t *hva_src, hwaddr gpa_src,
|
||||
uint32_t len, MemTxAttrs attrs);
|
||||
};
|
||||
|
||||
/** MemoryRegion:
|
||||
*
|
||||
* A struct representing a memory region.
|
||||
@@ -741,6 +754,8 @@ struct MemoryRegion {
|
||||
bool is_iommu;
|
||||
RAMBlock *ram_block;
|
||||
Object *owner;
|
||||
/* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */
|
||||
DeviceState *dev;
|
||||
|
||||
const MemoryRegionOps *ops;
|
||||
void *opaque;
|
||||
@@ -765,6 +780,10 @@ struct MemoryRegion {
|
||||
unsigned ioeventfd_nb;
|
||||
MemoryRegionIoeventfd *ioeventfds;
|
||||
RamDiscardManager *rdm; /* Only for RAM */
|
||||
const MemoryRegionRAMReadWriteOps *ram_debug_ops;
|
||||
|
||||
/* For devices designed to perform re-entrant IO into their own IO MRs */
|
||||
bool disable_reentrancy_guard;
|
||||
};
|
||||
|
||||
struct IOMMUMemoryRegion {
|
||||
@@ -1012,6 +1031,19 @@ struct MemoryListener {
|
||||
*/
|
||||
void (*coalesced_io_del)(MemoryListener *listener, MemoryRegionSection *section,
|
||||
hwaddr addr, hwaddr len);
|
||||
|
||||
/**
|
||||
* @convert_mem_attr:
|
||||
*
|
||||
* Called during the memory attribute conversion.
|
||||
*
|
||||
* @listener: The #MemoryListener
|
||||
* @section: The MemoryRegionSection
|
||||
* @shared: convert memory attrubute from private to shared
|
||||
*/
|
||||
void (*convert_mem_attr)(MemoryListener *listener, MemoryRegionSection *section,
|
||||
bool shared);
|
||||
|
||||
/**
|
||||
* @priority:
|
||||
*
|
||||
@@ -1333,6 +1365,17 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr,
|
||||
int fd,
|
||||
ram_addr_t offset,
|
||||
Error **errp);
|
||||
|
||||
/**
|
||||
* memory_region_set_restricted_fd: Set RAM memory region with a restricted fd.
|
||||
*
|
||||
* @mr: the #MemoryRegion to be set.
|
||||
* @fd: the fd to provide restricted memory.
|
||||
*
|
||||
*/
|
||||
|
||||
void memory_region_set_restricted_fd(MemoryRegion *mr, int fd);
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
@@ -1427,6 +1470,44 @@ void memory_region_init_rom_nomigrate(MemoryRegion *mr,
|
||||
uint64_t size,
|
||||
Error **errp);
|
||||
|
||||
/**
|
||||
* memory_region_set_ram_debug_ops: Set access ops for a give memory region.
|
||||
*
|
||||
* @mr: the #MemoryRegion to be initialized
|
||||
* @ops: a function that will be used when accessing @target region during
|
||||
* debug
|
||||
*/
|
||||
static inline void
|
||||
memory_region_set_ram_debug_ops(MemoryRegion *mr,
|
||||
const MemoryRegionRAMReadWriteOps *ops)
|
||||
{
|
||||
mr->ram_debug_ops = ops;
|
||||
}
|
||||
|
||||
/**
|
||||
* memory_region_ram_debug_ops_read_available: check if ram_debug_ops->read
|
||||
* is available
|
||||
*
|
||||
* @mr: the #MemoryRegion to be checked
|
||||
*/
|
||||
static inline bool
|
||||
memory_region_ram_debug_ops_read_available(MemoryRegion *mr)
|
||||
{
|
||||
return mr->ram_debug_ops && mr->ram_debug_ops->read;
|
||||
}
|
||||
|
||||
/**
|
||||
* memory_region_ram_debug_ops_write_available: check if ram_debug_ops->write
|
||||
* is available
|
||||
*
|
||||
* @mr: the #MemoryRegion to be checked
|
||||
*/
|
||||
static inline bool
|
||||
memory_region_ram_debug_ops_write_available(MemoryRegion *mr)
|
||||
{
|
||||
return mr->ram_debug_ops && mr->ram_debug_ops->write;
|
||||
}
|
||||
|
||||
/**
|
||||
* memory_region_init_rom_device_nomigrate: Initialize a ROM memory region.
|
||||
* Writes are handled via callbacks.
|
||||
@@ -1509,6 +1590,12 @@ void memory_region_init_ram(MemoryRegion *mr,
|
||||
uint64_t size,
|
||||
Error **errp);
|
||||
|
||||
void memory_region_init_ram_restricted(MemoryRegion *mr,
|
||||
Object *owner,
|
||||
const char *name,
|
||||
uint64_t size,
|
||||
Error **errp);
|
||||
|
||||
/**
|
||||
* memory_region_init_rom: Initialize a ROM memory region.
|
||||
*
|
||||
@@ -1628,6 +1715,16 @@ static inline bool memory_region_is_romd(MemoryRegion *mr)
|
||||
*/
|
||||
bool memory_region_is_protected(MemoryRegion *mr);
|
||||
|
||||
/**
|
||||
* memory_region_can_be_private: check whether a memory region has
|
||||
* restricted memfd
|
||||
*
|
||||
* Returns %true if a memory region's ram_block has restricted_fd assigned.
|
||||
*
|
||||
* @mr: the memory region being queried
|
||||
*/
|
||||
bool memory_region_can_be_private(MemoryRegion *mr);
|
||||
|
||||
/**
|
||||
* memory_region_get_iommu: check whether a memory region is an iommu
|
||||
*
|
||||
@@ -2384,6 +2481,14 @@ MemoryRegionSection memory_region_find(MemoryRegion *mr,
|
||||
*/
|
||||
void memory_global_dirty_log_sync(void);
|
||||
|
||||
/**
|
||||
* memory_region_convert_mem_attr: convert the memory attribute
|
||||
* @section: the #MemoryRegionSection to be converted
|
||||
* @shared: if true, convert attribute from private to shared;
|
||||
* if false, convert from shared to private
|
||||
*/
|
||||
void memory_region_convert_mem_attr(MemoryRegionSection *section, bool shared);
|
||||
|
||||
/**
|
||||
* memory_global_dirty_log_sync: synchronize the dirty log for all memory
|
||||
*
|
||||
@@ -2565,6 +2670,12 @@ MemTxResult address_space_write_rom(AddressSpace *as, hwaddr addr,
|
||||
MemTxAttrs attrs,
|
||||
const void *buf, hwaddr len);
|
||||
|
||||
MemTxResult address_space_write_rom_debug(AddressSpace *as,
|
||||
hwaddr addr,
|
||||
MemTxAttrs attrs,
|
||||
const void *ptr,
|
||||
hwaddr len);
|
||||
|
||||
/* address_space_ld*: load from an address space
|
||||
* address_space_st*: store to an address space
|
||||
*
|
||||
@@ -2820,6 +2931,23 @@ MemTxResult address_space_write_cached_slow(MemoryRegionCache *cache,
|
||||
int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr);
|
||||
bool prepare_mmio_access(MemoryRegion *mr);
|
||||
|
||||
typedef struct MemoryDebugOps {
|
||||
MemTxResult (*read)(AddressSpace *as, hwaddr phys_addr,
|
||||
MemTxAttrs attrs, void *buf,
|
||||
hwaddr len);
|
||||
MemTxResult (*write)(AddressSpace *as, hwaddr phys_addr,
|
||||
MemTxAttrs attrs, const void *buf,
|
||||
hwaddr len);
|
||||
} MemoryDebugOps;
|
||||
|
||||
void address_space_set_debug_ops(const MemoryDebugOps *ops);
|
||||
|
||||
/* Export for functions in target/i386/helper.c */
|
||||
extern const MemoryDebugOps *physical_memory_debug_ops;
|
||||
|
||||
|
||||
void set_encrypted_memory_debug_ops(void);
|
||||
|
||||
static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
|
||||
{
|
||||
if (is_write) {
|
||||
@@ -2875,6 +3003,10 @@ MemTxResult address_space_read(AddressSpace *as, hwaddr addr,
|
||||
return result;
|
||||
}
|
||||
|
||||
MemTxResult address_space_read_debug(AddressSpace *as, hwaddr addr,
|
||||
MemTxAttrs attrs, void *buf,
|
||||
hwaddr len);
|
||||
|
||||
/**
|
||||
* address_space_read_cached: read from a cached RAM region
|
||||
*
|
||||
@@ -3009,6 +3141,8 @@ bool ram_block_discard_is_disabled(void);
|
||||
*/
|
||||
bool ram_block_discard_is_required(void);
|
||||
|
||||
void ram_block_alloc_cgs_bitmap(RAMBlock *rb);
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@@ -40,6 +40,7 @@ struct RAMBlock {
|
||||
QLIST_ENTRY(RAMBlock) next;
|
||||
QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
|
||||
int fd;
|
||||
int restricted_fd;
|
||||
size_t page_size;
|
||||
/* dirty bitmap used during migration */
|
||||
unsigned long *bmap;
|
||||
@@ -65,6 +66,12 @@ struct RAMBlock {
|
||||
unsigned long *clear_bmap;
|
||||
uint8_t clear_bmap_shift;
|
||||
|
||||
/*
|
||||
* Bitmap for a confidential guest to record if a page is private (1) or
|
||||
* shared (0).
|
||||
*/
|
||||
unsigned long *cgs_bmap;
|
||||
|
||||
/*
|
||||
* RAM block length that corresponds to the used_length on the migration
|
||||
* source (after RAM block sizes were synchronized). Especially, after
|
||||
|
@@ -19,6 +19,7 @@ void apic_init_reset(DeviceState *s);
|
||||
void apic_sipi(DeviceState *s);
|
||||
void apic_poll_irq(DeviceState *d);
|
||||
void apic_designate_bsp(DeviceState *d, bool bsp);
|
||||
void apic_force_x2apic(DeviceState *d);
|
||||
int apic_get_highest_priority_irr(DeviceState *dev);
|
||||
|
||||
/* pc.c */
|
||||
|
@@ -187,6 +187,7 @@ struct APICCommonState {
|
||||
DeviceState *vapic;
|
||||
hwaddr vapic_paddr; /* note: persistence via kvmvapic */
|
||||
bool legacy_instance_id;
|
||||
bool force_x2apic;
|
||||
};
|
||||
|
||||
typedef struct VAPICState {
|
||||
|
@@ -128,9 +128,6 @@ struct PCMachineClass {
|
||||
|
||||
/* create kvmclock device even when KVM PV features are not exposed */
|
||||
bool kvmclock_create_always;
|
||||
|
||||
/* skip passing an rng seed for legacy machines */
|
||||
bool legacy_no_rng_seed;
|
||||
};
|
||||
|
||||
#define TYPE_PC_MACHINE "generic-pc-machine"
|
||||
|
63
include/hw/i386/tdvf.h
Normal file
63
include/hw/i386/tdvf.h
Normal file
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
* Copyright (c) 2020 Intel Corporation
|
||||
* Author: Isaku Yamahata <isaku.yamahata at gmail.com>
|
||||
* <isaku.yamahata at intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef HW_I386_TDVF_H
|
||||
#define HW_I386_TDVF_H
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
|
||||
#define TDVF_SECTION_TYPE_BFV 0
|
||||
#define TDVF_SECTION_TYPE_CFV 1
|
||||
#define TDVF_SECTION_TYPE_TD_HOB 2
|
||||
#define TDVF_SECTION_TYPE_TEMP_MEM 3
|
||||
#define TDVF_SECTION_TYPE_PERM_MEM 4
|
||||
#define TDVF_SECTION_TYPE_PAYLOAD 5
|
||||
|
||||
#define TDVF_SECTION_ATTRIBUTES_MR_EXTEND (1U << 0)
|
||||
#define TDVF_SECTION_ATTRIBUTES_PAGE_AUG (1U << 1)
|
||||
|
||||
typedef struct TdxFirmwareEntry {
|
||||
uint32_t data_offset;
|
||||
uint32_t data_len;
|
||||
uint64_t address;
|
||||
uint64_t size;
|
||||
uint32_t type;
|
||||
uint32_t attributes;
|
||||
|
||||
void *mem_ptr;
|
||||
} TdxFirmwareEntry;
|
||||
|
||||
typedef struct TdxFirmware {
|
||||
void *mem_ptr;
|
||||
|
||||
uint32_t nr_entries;
|
||||
TdxFirmwareEntry *entries;
|
||||
|
||||
/* For compatibility */
|
||||
bool guid_found;
|
||||
} TdxFirmware;
|
||||
|
||||
#define for_each_tdx_fw_entry(fw, e) \
|
||||
for (e = (fw)->entries; e != (fw)->entries + (fw)->nr_entries; e++)
|
||||
|
||||
int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size);
|
||||
|
||||
#endif /* HW_I386_TDVF_H */
|
@@ -61,6 +61,7 @@ struct X86MachineState {
|
||||
|
||||
/* CPU and apic information: */
|
||||
bool apic_xrupt_override;
|
||||
bool eoi_intercept_unsupported;
|
||||
unsigned pci_irq_mask;
|
||||
unsigned apic_id_limit;
|
||||
uint16_t boot_cpus;
|
||||
@@ -126,8 +127,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
|
||||
void x86_load_linux(X86MachineState *x86ms,
|
||||
FWCfgState *fw_cfg,
|
||||
int acpi_data_size,
|
||||
bool pvh_enabled,
|
||||
bool legacy_no_rng_seed);
|
||||
bool pvh_enabled);
|
||||
|
||||
bool x86_machine_is_smm_enabled(const X86MachineState *x86ms);
|
||||
bool x86_machine_is_acpi_enabled(const X86MachineState *x86ms);
|
||||
|
@@ -117,28 +117,6 @@ struct FWCfgMemState {
|
||||
*/
|
||||
void fw_cfg_add_bytes(FWCfgState *s, uint16_t key, void *data, size_t len);
|
||||
|
||||
/**
|
||||
* fw_cfg_add_bytes_callback:
|
||||
* @s: fw_cfg device being modified
|
||||
* @key: selector key value for new fw_cfg item
|
||||
* @select_cb: callback function when selecting
|
||||
* @write_cb: callback function after a write
|
||||
* @callback_opaque: argument to be passed into callback function
|
||||
* @data: pointer to start of item data
|
||||
* @len: size of item data
|
||||
* @read_only: is file read only
|
||||
*
|
||||
* Add a new fw_cfg item, available by selecting the given key, as a raw
|
||||
* "blob" of the given size. The data referenced by the starting pointer
|
||||
* is only linked, NOT copied, into the data structure of the fw_cfg device.
|
||||
*/
|
||||
void fw_cfg_add_bytes_callback(FWCfgState *s, uint16_t key,
|
||||
FWCfgCallback select_cb,
|
||||
FWCfgWriteCallback write_cb,
|
||||
void *callback_opaque,
|
||||
void *data, size_t len,
|
||||
bool read_only);
|
||||
|
||||
/**
|
||||
* fw_cfg_add_string:
|
||||
* @s: fw_cfg device being modified
|
||||
|
@@ -162,6 +162,10 @@ struct NamedClockList {
|
||||
QLIST_ENTRY(NamedClockList) node;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
bool engaged_in_io;
|
||||
} MemReentrancyGuard;
|
||||
|
||||
/**
|
||||
* DeviceState:
|
||||
* @realized: Indicates whether the device has been fully constructed.
|
||||
@@ -194,6 +198,9 @@ struct DeviceState {
|
||||
int alias_required_for_version;
|
||||
ResettableState reset;
|
||||
GSList *unplug_blockers;
|
||||
|
||||
/* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */
|
||||
MemReentrancyGuard mem_reentrancy_guard;
|
||||
};
|
||||
|
||||
struct DeviceListener {
|
||||
|
@@ -49,6 +49,10 @@ struct QIOChannelSocket {
|
||||
socklen_t remoteAddrLen;
|
||||
ssize_t zero_copy_queued;
|
||||
ssize_t zero_copy_sent;
|
||||
|
||||
struct UnixSocketAddress sendtoDgramAddr;
|
||||
struct UnixSocketAddress recvfromDgramAddr;
|
||||
bool unix_datagram;
|
||||
};
|
||||
|
||||
|
||||
@@ -262,4 +266,10 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
|
||||
Error **errp);
|
||||
|
||||
|
||||
void qio_channel_socket_set_dgram_send_address(QIOChannelSocket *ioc,
|
||||
const struct UnixSocketAddress *un_addr);
|
||||
|
||||
void qio_channel_socket_get_dgram_recv_address(QIOChannelSocket *ioc,
|
||||
struct UnixSocketAddress *un_addr);
|
||||
|
||||
#endif /* QIO_CHANNEL_SOCKET_H */
|
||||
|
@@ -48,6 +48,7 @@ struct QIOChannelTLS {
|
||||
QIOChannel *master;
|
||||
QCryptoTLSSession *session;
|
||||
QIOChannelShutdown shutdown;
|
||||
guint hs_ioc_tag;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@@ -27,6 +27,7 @@ typedef struct SaveVMHandlers {
|
||||
/* This runs both outside and inside the iothread lock. */
|
||||
bool (*is_active)(void *opaque);
|
||||
bool (*has_postcopy)(void *opaque);
|
||||
int (*prepare_postcopy)(QEMUFile *f, void *opaque);
|
||||
|
||||
/* is_active_iterate
|
||||
* If it is not NULL then qemu_savevm_state_iterate will skip iteration if
|
||||
|
@@ -267,5 +267,4 @@ void qemu_hexdump_line(char *line, unsigned int b, const void *bufptr,
|
||||
|
||||
void qemu_hexdump(FILE *fp, const char *prefix,
|
||||
const void *bufptr, size_t size);
|
||||
|
||||
#endif
|
||||
|
@@ -32,12 +32,28 @@
|
||||
#define MFD_HUGE_SHIFT 26
|
||||
#endif
|
||||
|
||||
#ifndef RMFD_USERMNT
|
||||
#define RMFD_USERMNT 0x0001U
|
||||
#endif
|
||||
|
||||
#ifndef OPEN_TREE_CLOEXEC
|
||||
#define OPEN_TREE_CLOEXEC O_CLOEXEC
|
||||
#endif
|
||||
|
||||
int open_tree(int dirfd, const char *pathname, unsigned int flags);
|
||||
|
||||
#if defined CONFIG_LINUX && !defined CONFIG_MEMFD
|
||||
int memfd_create(const char *name, unsigned int flags);
|
||||
#endif
|
||||
|
||||
#if defined CONFIG_LINUX && !defined CONFIG_MEMFD_RESTRICTED
|
||||
int memfd_restricted(unsigned int flags, int mount_fd);
|
||||
#endif
|
||||
|
||||
int qemu_memfd_create(const char *name, size_t size, bool hugetlb,
|
||||
uint64_t hugetlbsize, unsigned int seals, Error **errp);
|
||||
int qemu_memfd_restricted(size_t size, unsigned int flags, int mount_fd, Error **errp);
|
||||
|
||||
bool qemu_memfd_alloc_check(void);
|
||||
void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals,
|
||||
int *fd, Error **errp);
|
||||
|
@@ -177,7 +177,7 @@ extern "C" {
|
||||
* supports QEMU_ERROR, this will be reported at compile time; otherwise
|
||||
* this will be reported at link time due to the missing symbol.
|
||||
*/
|
||||
extern G_NORETURN
|
||||
G_NORETURN extern
|
||||
void QEMU_ERROR("code path is reachable")
|
||||
qemu_build_not_reached_always(void);
|
||||
#if defined(__OPTIMIZE__) && !defined(__NO_INLINE__)
|
||||
|
@@ -77,7 +77,9 @@ int socket_connect(SocketAddress *addr, Error **errp);
|
||||
int socket_listen(SocketAddress *addr, int num, Error **errp);
|
||||
void socket_listen_cleanup(int fd, Error **errp);
|
||||
int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp);
|
||||
|
||||
int prepare_unix_sockaddr(UnixSocketAddress *saddr,
|
||||
struct sockaddr_un *un, size_t *addrlen,
|
||||
Error **errp);
|
||||
/* Old, ipv4 only bits. Don't use for new code. */
|
||||
int convert_host_port(struct sockaddr_in *saddr, const char *host,
|
||||
const char *port, Error **errp);
|
||||
@@ -155,4 +157,12 @@ SocketAddress *socket_address_flatten(SocketAddressLegacy *addr);
|
||||
* Return 0 on success.
|
||||
*/
|
||||
int socket_address_parse_named_fd(SocketAddress *addr, Error **errp);
|
||||
|
||||
/**
|
||||
* sockaddr_is_abstract:
|
||||
*
|
||||
* Return false on sock_un is not abstract or NULL, othewise true.
|
||||
*/
|
||||
bool sockaddr_is_abstract(struct sockaddr_un *sock_un);
|
||||
|
||||
#endif /* QEMU_SOCKETS_H */
|
||||
|
@@ -1887,6 +1887,23 @@ ObjectProperty *object_property_add_alias(Object *obj, const char *name,
|
||||
ObjectProperty *object_property_add_const_link(Object *obj, const char *name,
|
||||
Object *target);
|
||||
|
||||
|
||||
/**
|
||||
* object_property_add_sha384:
|
||||
* @obj: the object to add a property to
|
||||
* @name: the name of the property
|
||||
* @v: pointer to value
|
||||
* @flags: bitwise-or'd ObjectPropertyFlags
|
||||
*
|
||||
* Add an sha384 property in memory. This function will add a
|
||||
* property of type 'sha384'.
|
||||
*
|
||||
* Returns: The newly added property on success, or %NULL on failure.
|
||||
*/
|
||||
ObjectProperty * object_property_add_sha384(Object *obj, const char *name,
|
||||
const uint8_t *v,
|
||||
ObjectPropertyFlags flags);
|
||||
|
||||
/**
|
||||
* object_property_set_description:
|
||||
* @obj: the object owning the property
|
||||
|
198
include/standard-headers/uefi/uefi.h
Normal file
198
include/standard-headers/uefi/uefi.h
Normal file
@@ -0,0 +1,198 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* Author: Isaku Yamahata <isaku.yamahata at gmail.com>
|
||||
* <isaku.yamahata at intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef HW_I386_UEFI_H
|
||||
#define HW_I386_UEFI_H
|
||||
|
||||
/***************************************************************************/
|
||||
/*
|
||||
* basic EFI definitions
|
||||
* supplemented with UEFI Specification Version 2.8 (Errata A)
|
||||
* released February 2020
|
||||
*/
|
||||
/* UEFI integer is little endian */
|
||||
|
||||
typedef struct {
|
||||
uint32_t Data1;
|
||||
uint16_t Data2;
|
||||
uint16_t Data3;
|
||||
uint8_t Data4[8];
|
||||
} EFI_GUID;
|
||||
|
||||
typedef enum {
|
||||
EfiReservedMemoryType,
|
||||
EfiLoaderCode,
|
||||
EfiLoaderData,
|
||||
EfiBootServicesCode,
|
||||
EfiBootServicesData,
|
||||
EfiRuntimeServicesCode,
|
||||
EfiRuntimeServicesData,
|
||||
EfiConventionalMemory,
|
||||
EfiUnusableMemory,
|
||||
EfiACPIReclaimMemory,
|
||||
EfiACPIMemoryNVS,
|
||||
EfiMemoryMappedIO,
|
||||
EfiMemoryMappedIOPortSpace,
|
||||
EfiPalCode,
|
||||
EfiPersistentMemory,
|
||||
EfiUnacceptedMemoryType,
|
||||
EfiMaxMemoryType
|
||||
} EFI_MEMORY_TYPE;
|
||||
|
||||
#define EFI_HOB_HANDOFF_TABLE_VERSION 0x0009
|
||||
|
||||
#define EFI_HOB_TYPE_HANDOFF 0x0001
|
||||
#define EFI_HOB_TYPE_MEMORY_ALLOCATION 0x0002
|
||||
#define EFI_HOB_TYPE_RESOURCE_DESCRIPTOR 0x0003
|
||||
#define EFI_HOB_TYPE_GUID_EXTENSION 0x0004
|
||||
#define EFI_HOB_TYPE_FV 0x0005
|
||||
#define EFI_HOB_TYPE_CPU 0x0006
|
||||
#define EFI_HOB_TYPE_MEMORY_POOL 0x0007
|
||||
#define EFI_HOB_TYPE_FV2 0x0009
|
||||
#define EFI_HOB_TYPE_LOAD_PEIM_UNUSED 0x000A
|
||||
#define EFI_HOB_TYPE_UEFI_CAPSULE 0x000B
|
||||
#define EFI_HOB_TYPE_FV3 0x000C
|
||||
#define EFI_HOB_TYPE_UNUSED 0xFFFE
|
||||
#define EFI_HOB_TYPE_END_OF_HOB_LIST 0xFFFF
|
||||
|
||||
typedef struct {
|
||||
uint16_t HobType;
|
||||
uint16_t HobLength;
|
||||
uint32_t Reserved;
|
||||
} EFI_HOB_GENERIC_HEADER;
|
||||
|
||||
typedef uint64_t EFI_PHYSICAL_ADDRESS;
|
||||
typedef uint32_t EFI_BOOT_MODE;
|
||||
|
||||
typedef struct {
|
||||
EFI_HOB_GENERIC_HEADER Header;
|
||||
uint32_t Version;
|
||||
EFI_BOOT_MODE BootMode;
|
||||
EFI_PHYSICAL_ADDRESS EfiMemoryTop;
|
||||
EFI_PHYSICAL_ADDRESS EfiMemoryBottom;
|
||||
EFI_PHYSICAL_ADDRESS EfiFreeMemoryTop;
|
||||
EFI_PHYSICAL_ADDRESS EfiFreeMemoryBottom;
|
||||
EFI_PHYSICAL_ADDRESS EfiEndOfHobList;
|
||||
} EFI_HOB_HANDOFF_INFO_TABLE;
|
||||
|
||||
#define EFI_RESOURCE_SYSTEM_MEMORY 0x00000000
|
||||
#define EFI_RESOURCE_MEMORY_MAPPED_IO 0x00000001
|
||||
#define EFI_RESOURCE_IO 0x00000002
|
||||
#define EFI_RESOURCE_FIRMWARE_DEVICE 0x00000003
|
||||
#define EFI_RESOURCE_MEMORY_MAPPED_IO_PORT 0x00000004
|
||||
#define EFI_RESOURCE_MEMORY_RESERVED 0x00000005
|
||||
#define EFI_RESOURCE_IO_RESERVED 0x00000006
|
||||
#define EFI_RESOURCE_MEMORY_UNACCEPTED 0x00000007
|
||||
#define EFI_RESOURCE_MAX_MEMORY_TYPE 0x00000008
|
||||
|
||||
#define EFI_RESOURCE_ATTRIBUTE_PRESENT 0x00000001
|
||||
#define EFI_RESOURCE_ATTRIBUTE_INITIALIZED 0x00000002
|
||||
#define EFI_RESOURCE_ATTRIBUTE_TESTED 0x00000004
|
||||
#define EFI_RESOURCE_ATTRIBUTE_SINGLE_BIT_ECC 0x00000008
|
||||
#define EFI_RESOURCE_ATTRIBUTE_MULTIPLE_BIT_ECC 0x00000010
|
||||
#define EFI_RESOURCE_ATTRIBUTE_ECC_RESERVED_1 0x00000020
|
||||
#define EFI_RESOURCE_ATTRIBUTE_ECC_RESERVED_2 0x00000040
|
||||
#define EFI_RESOURCE_ATTRIBUTE_READ_PROTECTED 0x00000080
|
||||
#define EFI_RESOURCE_ATTRIBUTE_WRITE_PROTECTED 0x00000100
|
||||
#define EFI_RESOURCE_ATTRIBUTE_EXECUTION_PROTECTED 0x00000200
|
||||
#define EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE 0x00000400
|
||||
#define EFI_RESOURCE_ATTRIBUTE_WRITE_COMBINEABLE 0x00000800
|
||||
#define EFI_RESOURCE_ATTRIBUTE_WRITE_THROUGH_CACHEABLE 0x00001000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_WRITE_BACK_CACHEABLE 0x00002000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_16_BIT_IO 0x00004000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_32_BIT_IO 0x00008000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_64_BIT_IO 0x00010000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_UNCACHED_EXPORTED 0x00020000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_READ_ONLY_PROTECTED 0x00040000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_READ_ONLY_PROTECTABLE 0x00080000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_READ_PROTECTABLE 0x00100000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_WRITE_PROTECTABLE 0x00200000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_EXECUTION_PROTECTABLE 0x00400000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_PERSISTENT 0x00800000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_PERSISTABLE 0x01000000
|
||||
#define EFI_RESOURCE_ATTRIBUTE_MORE_RELIABLE 0x02000000
|
||||
|
||||
typedef uint32_t EFI_RESOURCE_TYPE;
|
||||
typedef uint32_t EFI_RESOURCE_ATTRIBUTE_TYPE;
|
||||
|
||||
typedef struct {
|
||||
EFI_HOB_GENERIC_HEADER Header;
|
||||
EFI_GUID Owner;
|
||||
EFI_RESOURCE_TYPE ResourceType;
|
||||
EFI_RESOURCE_ATTRIBUTE_TYPE ResourceAttribute;
|
||||
EFI_PHYSICAL_ADDRESS PhysicalStart;
|
||||
uint64_t ResourceLength;
|
||||
} EFI_HOB_RESOURCE_DESCRIPTOR;
|
||||
|
||||
typedef struct {
|
||||
EFI_HOB_GENERIC_HEADER Header;
|
||||
EFI_GUID Name;
|
||||
|
||||
/* guid specific data follows */
|
||||
} EFI_HOB_GUID_TYPE;
|
||||
|
||||
typedef struct {
|
||||
EFI_HOB_GENERIC_HEADER Header;
|
||||
EFI_PHYSICAL_ADDRESS BaseAddress;
|
||||
uint64_t Length;
|
||||
} EFI_HOB_FIRMWARE_VOLUME;
|
||||
|
||||
typedef struct {
|
||||
EFI_HOB_GENERIC_HEADER Header;
|
||||
EFI_PHYSICAL_ADDRESS BaseAddress;
|
||||
uint64_t Length;
|
||||
EFI_GUID FvName;
|
||||
EFI_GUID FileName;
|
||||
} EFI_HOB_FIRMWARE_VOLUME2;
|
||||
|
||||
typedef struct {
|
||||
EFI_HOB_GENERIC_HEADER Header;
|
||||
EFI_PHYSICAL_ADDRESS BaseAddress;
|
||||
uint64_t Length;
|
||||
uint32_t AuthenticationStatus;
|
||||
bool ExtractedFv;
|
||||
EFI_GUID FvName;
|
||||
EFI_GUID FileName;
|
||||
} EFI_HOB_FIRMWARE_VOLUME3;
|
||||
|
||||
typedef struct {
|
||||
EFI_HOB_GENERIC_HEADER Header;
|
||||
uint8_t SizeOfMemorySpace;
|
||||
uint8_t SizeOfIoSpace;
|
||||
uint8_t Reserved[6];
|
||||
} EFI_HOB_CPU;
|
||||
|
||||
typedef struct {
|
||||
EFI_HOB_GENERIC_HEADER Header;
|
||||
} EFI_HOB_MEMORY_POOL;
|
||||
|
||||
typedef struct {
|
||||
EFI_HOB_GENERIC_HEADER Header;
|
||||
|
||||
EFI_PHYSICAL_ADDRESS BaseAddress;
|
||||
uint64_t Length;
|
||||
} EFI_HOB_UEFI_CAPSULE;
|
||||
|
||||
#define EFI_HOB_OWNER_ZERO \
|
||||
((EFI_GUID){ 0x00000000, 0x0000, 0x0000, \
|
||||
{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } })
|
||||
|
||||
#endif
|
@@ -156,6 +156,7 @@ typedef struct DumpState {
|
||||
MemoryMappingList list;
|
||||
bool resume;
|
||||
bool detached;
|
||||
bool encrypted_guest;
|
||||
hwaddr memory_offset;
|
||||
int fd;
|
||||
|
||||
@@ -173,6 +174,7 @@ typedef struct DumpState {
|
||||
/* Elf dump related data */
|
||||
uint32_t phdr_num;
|
||||
uint32_t shdr_num;
|
||||
|
||||
ssize_t note_size;
|
||||
hwaddr shdr_offset;
|
||||
hwaddr phdr_offset;
|
||||
|
@@ -71,7 +71,8 @@ struct HostMemoryBackend {
|
||||
DECLARE_BITMAP(host_nodes, MAX_NODES + 1);
|
||||
HostMemPolicy policy;
|
||||
|
||||
MemoryRegion mr;
|
||||
MemoryRegion base_mr;
|
||||
MemoryRegion *mr;
|
||||
};
|
||||
|
||||
bool host_memory_backend_mr_inited(HostMemoryBackend *backend);
|
||||
|
@@ -46,6 +46,8 @@ extern bool kvm_readonly_mem_allowed;
|
||||
extern bool kvm_direct_msi_allowed;
|
||||
extern bool kvm_ioeventfd_any_length_allowed;
|
||||
extern bool kvm_msi_use_devid;
|
||||
extern bool kvm_ram_default_shared;
|
||||
extern int kvm_vm_type;
|
||||
|
||||
#define kvm_enabled() (kvm_allowed)
|
||||
/**
|
||||
@@ -371,6 +373,7 @@ int kvm_arch_put_registers(CPUState *cpu, int level);
|
||||
|
||||
int kvm_arch_init(MachineState *ms, KVMState *s);
|
||||
|
||||
int kvm_arch_pre_create_vcpu(CPUState *cpu);
|
||||
int kvm_arch_init_vcpu(CPUState *cpu);
|
||||
int kvm_arch_destroy_vcpu(CPUState *cpu);
|
||||
|
||||
@@ -582,4 +585,17 @@ bool kvm_arch_cpu_check_are_resettable(void);
|
||||
bool kvm_dirty_ring_enabled(void);
|
||||
|
||||
uint32_t kvm_dirty_ring_size(void);
|
||||
|
||||
int kvm_encrypt_reg_region(hwaddr start, hwaddr size, bool reg_region);
|
||||
int kvm_convert_memory(hwaddr start, hwaddr size,
|
||||
bool shared_to_private, int cpu_index);
|
||||
|
||||
typedef void (*set_memory_region_debug_ops)(void *handle, MemoryRegion *mr);
|
||||
void kvm_setup_set_memory_region_debug_ops(struct KVMState *s,
|
||||
set_memory_region_debug_ops new_ops);
|
||||
void kvm_set_memory_region_debug_ops(void *handle, MemoryRegion *mr);
|
||||
|
||||
void kvm_encrypted_guest_set_memory_region_debug_ops(void *handle,
|
||||
MemoryRegion *mr);
|
||||
hwaddr kvm_encrypted_guest_mask_gpa_stolen_bit(hwaddr gpa);
|
||||
#endif
|
||||
|
@@ -29,6 +29,8 @@ typedef struct KVMSlot
|
||||
int as_id;
|
||||
/* Cache of the offset in ram address space */
|
||||
ram_addr_t ram_start_offset;
|
||||
int fd;
|
||||
hwaddr ofs;
|
||||
} KVMSlot;
|
||||
|
||||
typedef struct KVMMemoryListener {
|
||||
@@ -110,6 +112,8 @@ struct KVMState
|
||||
struct KVMDirtyRingReaper reaper;
|
||||
NotifyVmexitOption notify_vmexit;
|
||||
uint32_t notify_window;
|
||||
|
||||
set_memory_region_debug_ops set_mr_debug_ops;
|
||||
};
|
||||
|
||||
void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
|
||||
|
6
include/sysemu/tdx.h
Normal file
6
include/sysemu/tdx.h
Normal file
@@ -0,0 +1,6 @@
|
||||
#ifndef QEMU_TDX_H
|
||||
#define QEMU_TDX_H
|
||||
|
||||
bool tdx_debug_enabled(void);
|
||||
|
||||
#endif
|
@@ -311,6 +311,9 @@ int qio_channel_socket_dgram_sync(QIOChannelSocket *ioc,
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (localAddr && localAddr->type == SOCKET_ADDRESS_TYPE_UNIX)
|
||||
ioc->unix_datagram = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -414,6 +417,41 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void qio_channel_socket_set_dgram_send_address(QIOChannelSocket *ioc,
|
||||
const struct UnixSocketAddress *un_addr)
|
||||
{
|
||||
if (!ioc->unix_datagram)
|
||||
return;
|
||||
|
||||
if (!un_addr)
|
||||
return;
|
||||
|
||||
g_free(ioc->sendtoDgramAddr.path);
|
||||
memcpy(&ioc->sendtoDgramAddr, un_addr, sizeof(*un_addr));
|
||||
ioc->sendtoDgramAddr.path = g_strdup(un_addr->path);
|
||||
}
|
||||
|
||||
void qio_channel_socket_get_dgram_recv_address(QIOChannelSocket *ioc,
|
||||
struct UnixSocketAddress *un_addr)
|
||||
{
|
||||
if (!ioc->unix_datagram)
|
||||
return;
|
||||
|
||||
if (!un_addr || !un_addr->path)
|
||||
return;
|
||||
|
||||
if (!ioc->recvfromDgramAddr.path)
|
||||
return;
|
||||
|
||||
#if defined(CONFIG_LINUX)
|
||||
un_addr->has_abstract = ioc->recvfromDgramAddr.has_abstract;
|
||||
un_addr->abstract = ioc->recvfromDgramAddr.abstract;
|
||||
un_addr->has_tight = ioc->recvfromDgramAddr.has_tight;
|
||||
un_addr->tight = ioc->recvfromDgramAddr.tight;
|
||||
#endif /* defined(CONFIG_LINUX) */
|
||||
strcpy(un_addr->path, ioc->recvfromDgramAddr.path);
|
||||
}
|
||||
|
||||
static void qio_channel_socket_init(Object *obj)
|
||||
{
|
||||
QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(obj);
|
||||
@@ -441,6 +479,9 @@ static void qio_channel_socket_finalize(Object *obj)
|
||||
closesocket(ioc->fd);
|
||||
ioc->fd = -1;
|
||||
}
|
||||
|
||||
g_free(ioc->sendtoDgramAddr.path);
|
||||
g_free(ioc->recvfromDgramAddr.path);
|
||||
}
|
||||
|
||||
|
||||
@@ -490,7 +531,6 @@ static void qio_channel_socket_copy_fds(struct msghdr *msg,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
|
||||
const struct iovec *iov,
|
||||
size_t niov,
|
||||
@@ -503,6 +543,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
|
||||
struct msghdr msg = { NULL, };
|
||||
char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)];
|
||||
int sflags = 0;
|
||||
struct sockaddr_un addr_un;
|
||||
|
||||
memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS));
|
||||
|
||||
@@ -517,6 +558,12 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
|
||||
|
||||
}
|
||||
|
||||
if (sioc->unix_datagram) {
|
||||
memset(&addr_un, 0, sizeof(addr_un));
|
||||
msg.msg_name = &addr_un;
|
||||
msg.msg_namelen = sizeof(addr_un);
|
||||
}
|
||||
|
||||
retry:
|
||||
ret = recvmsg(sioc->fd, &msg, sflags);
|
||||
if (ret < 0) {
|
||||
@@ -536,6 +583,17 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
|
||||
qio_channel_socket_copy_fds(&msg, fds, nfds);
|
||||
}
|
||||
|
||||
if (ret > 0 && sioc->unix_datagram) {
|
||||
g_free(sioc->recvfromDgramAddr.path);
|
||||
if (sockaddr_is_abstract(&addr_un)) {
|
||||
sioc->recvfromDgramAddr.path = g_strdup(addr_un.sun_path + 1);
|
||||
sioc->recvfromDgramAddr.abstract = true;
|
||||
} else {
|
||||
sioc->recvfromDgramAddr.path = g_strdup(addr_un.sun_path);
|
||||
sioc->recvfromDgramAddr.abstract = false;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -554,6 +612,8 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
|
||||
size_t fdsize = sizeof(int) * nfds;
|
||||
struct cmsghdr *cmsg;
|
||||
int sflags = 0;
|
||||
struct sockaddr_un addr;
|
||||
size_t addr_len;
|
||||
|
||||
memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS));
|
||||
|
||||
@@ -590,6 +650,16 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
|
||||
#endif
|
||||
}
|
||||
|
||||
if (sioc->unix_datagram && sioc->sendtoDgramAddr.path) {
|
||||
ret = prepare_unix_sockaddr(&sioc->sendtoDgramAddr,
|
||||
&addr, &addr_len, errp);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
msg.msg_name = &addr;
|
||||
msg.msg_namelen = addr_len;
|
||||
}
|
||||
|
||||
retry:
|
||||
ret = sendmsg(sioc->fd, &msg, sflags);
|
||||
if (ret <= 0) {
|
||||
@@ -605,6 +675,11 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
case ENOENT:
|
||||
case ECONNREFUSED:
|
||||
if (sioc->unix_datagram) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
error_setg_errno(errp, errno,
|
||||
|
@@ -195,12 +195,13 @@ static void qio_channel_tls_handshake_task(QIOChannelTLS *ioc,
|
||||
}
|
||||
|
||||
trace_qio_channel_tls_handshake_pending(ioc, status);
|
||||
qio_channel_add_watch_full(ioc->master,
|
||||
condition,
|
||||
qio_channel_tls_handshake_io,
|
||||
data,
|
||||
NULL,
|
||||
context);
|
||||
ioc->hs_ioc_tag =
|
||||
qio_channel_add_watch_full(ioc->master,
|
||||
condition,
|
||||
qio_channel_tls_handshake_io,
|
||||
data,
|
||||
NULL,
|
||||
context);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -215,6 +216,7 @@ static gboolean qio_channel_tls_handshake_io(QIOChannel *ioc,
|
||||
QIOChannelTLS *tioc = QIO_CHANNEL_TLS(
|
||||
qio_task_get_source(task));
|
||||
|
||||
tioc->hs_ioc_tag = 0;
|
||||
g_free(data);
|
||||
qio_channel_tls_handshake_task(tioc, task, context);
|
||||
|
||||
@@ -374,6 +376,10 @@ static int qio_channel_tls_close(QIOChannel *ioc,
|
||||
{
|
||||
QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
|
||||
|
||||
if (tioc->hs_ioc_tag) {
|
||||
g_clear_handle_id(&tioc->hs_ioc_tag, g_source_remove);
|
||||
}
|
||||
|
||||
return qio_channel_close(tioc->master, errp);
|
||||
}
|
||||
|
||||
|
@@ -886,8 +886,11 @@ __SYSCALL(__NR_futex_waitv, sys_futex_waitv)
|
||||
#define __NR_set_mempolicy_home_node 450
|
||||
__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
|
||||
|
||||
#define __NR_memfd_restricted 451
|
||||
__SYSCALL(__NR_memfd_restricted, sys_memfd_restricted)
|
||||
|
||||
#undef __NR_syscalls
|
||||
#define __NR_syscalls 451
|
||||
#define __NR_syscalls 452
|
||||
|
||||
/*
|
||||
* 32 bit systems traditionally used different
|
||||
|
@@ -532,4 +532,189 @@ struct kvm_pmu_event_filter {
|
||||
#define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */
|
||||
#define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */
|
||||
|
||||
#define KVM_X86_DEFAULT_VM 0
|
||||
#define KVM_X86_TDX_VM 1
|
||||
|
||||
#define TDX_MIG_EXPORT_TRACK_F_IN_ORDER_DONE (1UL << 63)
|
||||
|
||||
/* Trust Domain eXtension sub-ioctl() commands. */
|
||||
enum kvm_tdx_cmd_id {
|
||||
KVM_TDX_CAPABILITIES = 0,
|
||||
KVM_TDX_INIT_VM,
|
||||
KVM_TDX_INIT_VCPU,
|
||||
KVM_TDX_INIT_MEM_REGION,
|
||||
KVM_TDX_FINALIZE_VM,
|
||||
KVM_TDX_SERVTD_PREBIND,
|
||||
KVM_TDX_SERVTD_BIND,
|
||||
KVM_TDX_SET_MIGRATION_INFO,
|
||||
KVM_TDX_GET_MIGRATION_INFO,
|
||||
KVM_TDX_MIG_EXPORT_STATE_IMMUTABLE,
|
||||
KVM_TDX_MIG_IMPORT_STATE_IMMUTABLE,
|
||||
KVM_TDX_MIG_EXPORT_MEM,
|
||||
KVM_TDX_MIG_IMPORT_MEM,
|
||||
KVM_TDX_MIG_EXPORT_TRACK,
|
||||
KVM_TDX_MIG_IMPORT_TRACK,
|
||||
KVM_TDX_MIG_EXPORT_PAUSE,
|
||||
KVM_TDX_MIG_EXPORT_STATE_TD,
|
||||
KVM_TDX_MIG_IMPORT_STATE_TD,
|
||||
KVM_TDX_MIG_EXPORT_STATE_VP,
|
||||
KVM_TDX_MIG_IMPORT_STATE_VP,
|
||||
KVM_TDX_MIG_EXPORT_ABORT,
|
||||
KVM_TDX_MIG_IMPORT_END,
|
||||
KVM_TDX_SET_VTPM_ENABLED,
|
||||
|
||||
KVM_TDX_CMD_NR_MAX,
|
||||
};
|
||||
|
||||
/*
|
||||
* Flag set from userspace to indicate the initialization it for a migratino
|
||||
* destination TD, and some initialition work (e.g. tdh.mng.init) should be
|
||||
* skipped.
|
||||
*/
|
||||
#define KVM_TDX_INIT_VM_F_POST_INIT 1
|
||||
|
||||
struct kvm_tdx_cmd {
|
||||
/* enum kvm_tdx_cmd_id */
|
||||
__u32 id;
|
||||
/* flags for sub-commend. If sub-command doesn't use this, set zero. */
|
||||
__u32 flags;
|
||||
/*
|
||||
* data for each sub-command. An immediate or a pointer to the actual
|
||||
* data in process virtual address. If sub-command doesn't use it,
|
||||
* set zero.
|
||||
*/
|
||||
__u64 data;
|
||||
/*
|
||||
* Auxiliary error code. The sub-command may return TDX SEAMCALL
|
||||
* status code in addition to -Exxx.
|
||||
* Defined for consistency with struct kvm_sev_cmd.
|
||||
*/
|
||||
__u64 error;
|
||||
/* Reserved: Defined for consistency with struct kvm_sev_cmd. */
|
||||
__u64 unused;
|
||||
};
|
||||
|
||||
struct kvm_tdx_cpuid_config {
|
||||
__u32 leaf;
|
||||
__u32 sub_leaf;
|
||||
__u32 eax;
|
||||
__u32 ebx;
|
||||
__u32 ecx;
|
||||
__u32 edx;
|
||||
};
|
||||
|
||||
struct kvm_tdx_capabilities {
|
||||
__u64 attrs_fixed0;
|
||||
__u64 attrs_fixed1;
|
||||
__u64 xfam_fixed0;
|
||||
__u64 xfam_fixed1;
|
||||
|
||||
__u32 nr_cpuid_configs;
|
||||
__u32 padding;
|
||||
struct kvm_tdx_cpuid_config cpuid_configs[0];
|
||||
};
|
||||
|
||||
struct kvm_tdx_init_vm {
|
||||
__u64 attributes;
|
||||
__u64 mrconfigid[6]; /* sha384 digest */
|
||||
__u64 mrowner[6]; /* sha384 digest */
|
||||
__u64 mrownerconfig[6]; /* sha348 digest */
|
||||
union {
|
||||
/*
|
||||
* KVM_TDX_INIT_VM is called before vcpu creation, thus before
|
||||
* KVM_SET_CPUID2. CPUID configurations needs to be passed.
|
||||
*
|
||||
* This configuration supersedes KVM_SET_CPUID{,2}.
|
||||
* The user space VMM, e.g. qemu, should make them consistent
|
||||
* with this values.
|
||||
* sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES(256)
|
||||
* = 8KB.
|
||||
*/
|
||||
struct {
|
||||
struct kvm_cpuid2 cpuid;
|
||||
/* 8KB with KVM_MAX_CPUID_ENTRIES. */
|
||||
struct kvm_cpuid_entry2 entries[];
|
||||
};
|
||||
/*
|
||||
* For future extensibility.
|
||||
* The size(struct kvm_tdx_init_vm) = 16KB.
|
||||
* This should be enough given sizeof(TD_PARAMS) = 1024
|
||||
*/
|
||||
__u64 reserved[2029];
|
||||
};
|
||||
};
|
||||
|
||||
#define KVM_TDX_MEASURE_MEMORY_REGION (1UL << 0)
|
||||
|
||||
struct kvm_tdx_init_mem_region {
|
||||
__u64 source_addr;
|
||||
__u64 gpa;
|
||||
__u64 nr_pages;
|
||||
};
|
||||
|
||||
struct kvm_rw_memory {
|
||||
/* This can be GPA or HVA */
|
||||
__u64 addr;
|
||||
|
||||
/* This will be updated to completed
|
||||
* reading/writing byte size when the ioctl return
|
||||
*/
|
||||
__u64 len;
|
||||
__u64 ubuf;
|
||||
};
|
||||
|
||||
typedef enum kvm_tdx_servtd_type {
|
||||
KVM_TDX_SERVTD_TYPE_MIGTD = 0,
|
||||
|
||||
KVM_TDX_SERVTD_TYPE_MAX,
|
||||
} kvm_tdx_servtd_type_t;
|
||||
|
||||
/* A SHA384 hash takes up 48 bytes (Table 5.7, TDX module ABI Spec) */
|
||||
#define KVM_TDX_SERVTD_HASH_SIZE 48
|
||||
|
||||
struct kvm_tdx_servtd {
|
||||
#define KVM_TDX_SERVTD_VERSION 0
|
||||
__u8 version;
|
||||
__u8 pad[5];
|
||||
__u16 type;
|
||||
__u64 attr;
|
||||
union {
|
||||
/* KVM_TDX_SERVTD_BIND */
|
||||
__u32 pid;
|
||||
/* KVM_TDX_SERVTD_PREBIND */
|
||||
__u8 hash[KVM_TDX_SERVTD_HASH_SIZE];
|
||||
};
|
||||
};
|
||||
|
||||
struct kvm_tdx_set_migration_info {
|
||||
#define KVM_TDX_SET_MIGRATION_INFO_VERSION 0
|
||||
__u8 version;
|
||||
__u8 is_src;
|
||||
__u8 pad[2];
|
||||
__u32 vsock_port;
|
||||
};
|
||||
|
||||
struct kvm_tdx_get_migration_info {
|
||||
#define KVM_TDX_GET_MIGRATION_INFO_VERSION 0
|
||||
__u8 version;
|
||||
__u8 premig_done;
|
||||
__u8 pad[6];
|
||||
};
|
||||
|
||||
#define KVM_DEV_TDX_MIG_ATTR 0x1
|
||||
|
||||
struct kvm_dev_tdx_mig_attr {
|
||||
#define KVM_DEV_TDX_MIG_ATTR_VERSION 0
|
||||
__u32 version;
|
||||
/* 4KB buffer can hold 512 entries at most */
|
||||
#define TDX_MIG_BUF_LIST_PAGES_MAX 512
|
||||
__u32 buf_list_pages;
|
||||
__u32 max_migs;
|
||||
};
|
||||
|
||||
#define TDX_MIG_STREAM_MBMD_MAP_OFFSET 0
|
||||
#define TDX_MIG_STREAM_GPA_LIST_MAP_OFFSET 1
|
||||
#define TDX_MIG_STREAM_MAC_LIST_MAP_OFFSET 2
|
||||
#define TDX_MIG_STREAM_BUF_LIST_MAP_OFFSET 4
|
||||
|
||||
#endif /* _ASM_X86_KVM_H */
|
||||
|
@@ -441,6 +441,7 @@
|
||||
#define __NR_process_mrelease 448
|
||||
#define __NR_futex_waitv 449
|
||||
#define __NR_set_mempolicy_home_node 450
|
||||
#define __NR_memfd_restricted 451
|
||||
|
||||
|
||||
#endif /* _ASM_UNISTD_32_H */
|
||||
|
@@ -363,6 +363,7 @@
|
||||
#define __NR_process_mrelease 448
|
||||
#define __NR_futex_waitv 449
|
||||
#define __NR_set_mempolicy_home_node 450
|
||||
#define __NR_memfd_restricted 451
|
||||
|
||||
|
||||
#endif /* _ASM_UNISTD_64_H */
|
||||
|
@@ -316,6 +316,7 @@
|
||||
#define __NR_process_mrelease (__X32_SYSCALL_BIT + 448)
|
||||
#define __NR_futex_waitv (__X32_SYSCALL_BIT + 449)
|
||||
#define __NR_set_mempolicy_home_node (__X32_SYSCALL_BIT + 450)
|
||||
#define __NR_memfd_restricted (__X32_SYSCALL_BIT + 451)
|
||||
#define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512)
|
||||
#define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513)
|
||||
#define __NR_ioctl (__X32_SYSCALL_BIT + 514)
|
||||
|
@@ -103,6 +103,15 @@ struct kvm_userspace_memory_region {
|
||||
__u64 userspace_addr; /* start of the userspace allocated memory */
|
||||
};
|
||||
|
||||
struct kvm_userspace_memory_region_ext {
|
||||
struct kvm_userspace_memory_region region;
|
||||
__u64 restricted_offset;
|
||||
__u32 restricted_fd;
|
||||
__u32 pad1;
|
||||
__u64 pad2[14];
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace,
|
||||
* other bits are reserved for kvm internal use which are defined in
|
||||
@@ -110,6 +119,7 @@ struct kvm_userspace_memory_region {
|
||||
*/
|
||||
#define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0)
|
||||
#define KVM_MEM_READONLY (1UL << 1)
|
||||
#define KVM_MEM_PRIVATE (1UL << 2)
|
||||
|
||||
/* for KVM_IRQ_LINE */
|
||||
struct kvm_irq_level {
|
||||
@@ -231,6 +241,60 @@ struct kvm_xen_exit {
|
||||
} u;
|
||||
};
|
||||
|
||||
struct kvm_tdx_exit {
|
||||
#define KVM_EXIT_TDX_VMCALL 1
|
||||
__u32 type;
|
||||
__u32 pad;
|
||||
|
||||
union {
|
||||
struct kvm_tdx_vmcall {
|
||||
/*
|
||||
* Guest-Host-Communication Interface for TDX spec
|
||||
* defines the ABI for TDG.VP.VMCALL.
|
||||
*/
|
||||
|
||||
/* Input parameters: guest -> VMM */
|
||||
__u64 type; /* r10 */
|
||||
__u64 subfunction; /* r11 */
|
||||
__u64 reg_mask; /* rcx */
|
||||
/*
|
||||
* Subfunction specific.
|
||||
* Registers are used in this order to pass input
|
||||
* arguments. r12=arg0, r13=arg1, etc.
|
||||
*/
|
||||
__u64 in_r12;
|
||||
__u64 in_r13;
|
||||
__u64 in_r14;
|
||||
__u64 in_r15;
|
||||
__u64 in_rbx;
|
||||
__u64 in_rdi;
|
||||
__u64 in_rsi;
|
||||
__u64 in_r8;
|
||||
__u64 in_r9;
|
||||
__u64 in_rdx;
|
||||
|
||||
/* Output parameters: VMM -> guest */
|
||||
__u64 status_code; /* r10 */
|
||||
/*
|
||||
* Subfunction specific.
|
||||
* Registers are used in this order to output return
|
||||
* values. r11=ret0, r12=ret1, etc.
|
||||
*/
|
||||
__u64 out_r11;
|
||||
__u64 out_r12;
|
||||
__u64 out_r13;
|
||||
__u64 out_r14;
|
||||
__u64 out_r15;
|
||||
__u64 out_rbx;
|
||||
__u64 out_rdi;
|
||||
__u64 out_rsi;
|
||||
__u64 out_r8;
|
||||
__u64 out_r9;
|
||||
__u64 out_rdx;
|
||||
} vmcall;
|
||||
} u;
|
||||
};
|
||||
|
||||
#define KVM_S390_GET_SKEYS_NONE 1
|
||||
#define KVM_S390_SKEYS_MAX 1048576
|
||||
|
||||
@@ -273,6 +337,9 @@ struct kvm_xen_exit {
|
||||
#define KVM_EXIT_RISCV_CSR 36
|
||||
#define KVM_EXIT_NOTIFY 37
|
||||
|
||||
#define KVM_EXIT_TDX 50
|
||||
#define KVM_EXIT_MEMORY_FAULT 100
|
||||
|
||||
/* For KVM_EXIT_INTERNAL_ERROR */
|
||||
/* Emulate instruction failed. */
|
||||
#define KVM_INTERNAL_ERROR_EMULATION 1
|
||||
@@ -508,6 +575,15 @@ struct kvm_run {
|
||||
#define KVM_NOTIFY_CONTEXT_INVALID (1 << 0)
|
||||
__u32 flags;
|
||||
} notify;
|
||||
/* KVM_EXIT_MEMORY_FAULT */
|
||||
struct {
|
||||
#define KVM_MEMORY_EXIT_FLAG_PRIVATE (1ULL << 0)
|
||||
__u64 flags;
|
||||
__u64 gpa;
|
||||
__u64 size;
|
||||
} memory;
|
||||
/* KVM_EXIT_TDX_VMCALL */
|
||||
struct kvm_tdx_exit tdx;
|
||||
/* Fix the size of the union. */
|
||||
char padding[256];
|
||||
};
|
||||
@@ -1175,6 +1251,15 @@ struct kvm_ppc_resize_hpt {
|
||||
#define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220
|
||||
#define KVM_CAP_S390_ZPCI_OP 221
|
||||
#define KVM_CAP_S390_CPU_TOPOLOGY 222
|
||||
#define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223
|
||||
#define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224
|
||||
|
||||
/* TODO: remove this workaround to avoid CAP number conflict in the upstream. */
|
||||
#define KVM_CAP_MEMORY_ATTRIBUTES 500
|
||||
|
||||
#define KVM_CAP_ENCRYPT_MEMORY_DEBUG 300
|
||||
|
||||
#define KVM_CAP_VM_TYPES 1000
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
@@ -1426,6 +1511,8 @@ enum kvm_device_type {
|
||||
#define KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_XIVE
|
||||
KVM_DEV_TYPE_ARM_PV_TIME,
|
||||
#define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME
|
||||
KVM_DEV_TYPE_TDX_MIG_STREAM,
|
||||
#define KVM_DEV_TYPE_TDX_MIG_STREAM KVM_DEV_TYPE_TDX_MIG_STREAM
|
||||
KVM_DEV_TYPE_MAX,
|
||||
};
|
||||
|
||||
@@ -1854,6 +1941,10 @@ struct kvm_xen_vcpu_attr {
|
||||
#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7
|
||||
#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8
|
||||
|
||||
/* Read/write encrypted guest memory, for guest debugging support in QEMU*/
|
||||
#define KVM_MEMORY_ENCRYPT_READ_MEMORY _IOWR(KVMIO, 0xcc, struct kvm_rw_memory)
|
||||
#define KVM_MEMORY_ENCRYPT_WRITE_MEMORY _IOWR(KVMIO, 0xcd, struct kvm_rw_memory)
|
||||
|
||||
/* Secure Encrypted Virtualization command */
|
||||
enum sev_cmd_id {
|
||||
/* Guest initialization commands */
|
||||
@@ -2225,4 +2316,20 @@ struct kvm_s390_zpci_op {
|
||||
/* flags for kvm_s390_zpci_op->u.reg_aen.flags */
|
||||
#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0)
|
||||
|
||||
/* Available with KVM_CAP_MEMORY_ATTRIBUTES */
|
||||
#define KVM_GET_SUPPORTED_MEMORY_ATTRIBUTES _IOR(KVMIO, 0xd2, __u64)
|
||||
#define KVM_SET_MEMORY_ATTRIBUTES _IOWR(KVMIO, 0xd3, struct kvm_memory_attributes)
|
||||
|
||||
struct kvm_memory_attributes {
|
||||
__u64 address;
|
||||
__u64 size;
|
||||
__u64 attributes;
|
||||
__u64 flags;
|
||||
};
|
||||
|
||||
#define KVM_MEMORY_ATTRIBUTE_READ (1ULL << 0)
|
||||
#define KVM_MEMORY_ATTRIBUTE_WRITE (1ULL << 1)
|
||||
#define KVM_MEMORY_ATTRIBUTE_EXECUTE (1ULL << 2)
|
||||
#define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3)
|
||||
|
||||
#endif /* __LINUX_KVM_H */
|
||||
|
@@ -6454,8 +6454,8 @@ static abi_long do_prctl_inval1(CPUArchState *env, abi_long arg2)
|
||||
#define do_prctl_sme_set_vl do_prctl_inval1
|
||||
#endif
|
||||
|
||||
static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
|
||||
abi_long arg3, abi_long arg4, abi_long arg5)
|
||||
static abi_long do_prctl(CPUArchState *env, abi_ulong option, abi_ulong arg2,
|
||||
abi_ulong arg3, abi_ulong arg4, abi_ulong arg5)
|
||||
{
|
||||
abi_long ret;
|
||||
|
||||
@@ -8102,6 +8102,9 @@ static int open_self_stat(CPUArchState *cpu_env, int fd)
|
||||
gchar *bin = g_strrstr(ts->bprm->argv[0], "/");
|
||||
bin = bin ? bin + 1 : ts->bprm->argv[0];
|
||||
g_string_printf(buf, "(%.15s) ", bin);
|
||||
} else if (i == 2) {
|
||||
/* task state */
|
||||
g_string_assign(buf, "R "); /* we are running right now */
|
||||
} else if (i == 3) {
|
||||
/* ppid */
|
||||
g_string_printf(buf, FMT_pid " ", getppid());
|
||||
@@ -8206,7 +8209,8 @@ void target_exception_dump(CPUArchState *env, const char *fmt, int code)
|
||||
}
|
||||
|
||||
#if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN || \
|
||||
defined(TARGET_SPARC) || defined(TARGET_M68K) || defined(TARGET_HPPA)
|
||||
defined(TARGET_SPARC) || defined(TARGET_M68K) || \
|
||||
defined(TARGET_HPPA) || defined(TARGET_ARM)
|
||||
static int is_proc(const char *filename, const char *entry)
|
||||
{
|
||||
return strcmp(filename, entry) == 0;
|
||||
@@ -8278,6 +8282,27 @@ static int open_cpuinfo(CPUArchState *cpu_env, int fd)
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(TARGET_ARM)
|
||||
static int open_cpuinfo(CPUArchState *cpu_env, int fd)
|
||||
{
|
||||
dprintf(fd,
|
||||
"Processor : ARMv7 Processor rev 5 (v7l)\n"
|
||||
"BogoMIPS : 799.53\n"
|
||||
"Features : swp half thumb fastmult vfp edsp thumbee neon vfpv3\n"
|
||||
"CPU implementer : 0x41\n"
|
||||
"CPU architecture: 7\n"
|
||||
"CPU variant : 0x2\n"
|
||||
"CPU part : 0xc08\n"
|
||||
"CPU revision : 5\n"
|
||||
"\n"
|
||||
"Hardware : Genesi Efika MX (Smarttop)\n"
|
||||
"Revision : 51030\n"
|
||||
"Serial : 0000000000000000\n");
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(TARGET_M68K)
|
||||
static int open_hardware(CPUArchState *cpu_env, int fd)
|
||||
{
|
||||
@@ -8302,7 +8327,7 @@ static int do_openat(CPUArchState *cpu_env, int dirfd, const char *pathname, int
|
||||
#if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN
|
||||
{ "/proc/net/route", open_net_route, is_proc },
|
||||
#endif
|
||||
#if defined(TARGET_SPARC) || defined(TARGET_HPPA)
|
||||
#if defined(TARGET_SPARC) || defined(TARGET_HPPA) || defined(TARGET_ARM)
|
||||
{ "/proc/cpuinfo", open_cpuinfo, is_proc },
|
||||
#endif
|
||||
#if defined(TARGET_M68K)
|
||||
@@ -8607,10 +8632,10 @@ _syscall2(int, pivot_root, const char *, new_root, const char *, put_old)
|
||||
* of syscall results, can be performed.
|
||||
* All errnos that do_syscall() returns must be -TARGET_<errcode>.
|
||||
*/
|
||||
static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1,
|
||||
abi_long arg2, abi_long arg3, abi_long arg4,
|
||||
abi_long arg5, abi_long arg6, abi_long arg7,
|
||||
abi_long arg8)
|
||||
static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_ulong arg1,
|
||||
abi_ulong arg2, abi_ulong arg3, abi_ulong arg4,
|
||||
abi_ulong arg5, abi_ulong arg6, abi_ulong arg7,
|
||||
abi_ulong arg8)
|
||||
{
|
||||
CPUState *cpu = env_cpu(cpu_env);
|
||||
abi_long ret;
|
||||
@@ -9007,8 +9032,13 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1,
|
||||
return ret;
|
||||
#endif
|
||||
#ifdef TARGET_NR_lseek
|
||||
case TARGET_NR_lseek:
|
||||
return get_errno(lseek(arg1, arg2, arg3));
|
||||
case TARGET_NR_lseek: {
|
||||
off_t off = arg2;
|
||||
if (arg3 != SEEK_SET) {
|
||||
off = (abi_long)arg2;
|
||||
}
|
||||
return get_errno(lseek(arg1, off, arg3));
|
||||
}
|
||||
#endif
|
||||
#if defined(TARGET_NR_getxpid) && defined(TARGET_ALPHA)
|
||||
/* Alpha specific */
|
||||
@@ -13297,10 +13327,10 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1,
|
||||
return ret;
|
||||
}
|
||||
|
||||
abi_long do_syscall(CPUArchState *cpu_env, int num, abi_long arg1,
|
||||
abi_long arg2, abi_long arg3, abi_long arg4,
|
||||
abi_long arg5, abi_long arg6, abi_long arg7,
|
||||
abi_long arg8)
|
||||
abi_long do_syscall(CPUArchState *cpu_env, int num, abi_ulong arg1,
|
||||
abi_ulong arg2, abi_ulong arg3, abi_ulong arg4,
|
||||
abi_ulong arg5, abi_ulong arg6, abi_ulong arg7,
|
||||
abi_ulong arg8)
|
||||
{
|
||||
CPUState *cpu = env_cpu(cpu_env);
|
||||
abi_long ret;
|
||||
|
@@ -59,10 +59,10 @@ int info_is_fdpic(struct image_info *info);
|
||||
|
||||
void target_set_brk(abi_ulong new_brk);
|
||||
void syscall_init(void);
|
||||
abi_long do_syscall(CPUArchState *cpu_env, int num, abi_long arg1,
|
||||
abi_long arg2, abi_long arg3, abi_long arg4,
|
||||
abi_long arg5, abi_long arg6, abi_long arg7,
|
||||
abi_long arg8);
|
||||
abi_long do_syscall(CPUArchState *cpu_env, int num, abi_ulong arg1,
|
||||
abi_ulong arg2, abi_ulong arg3, abi_ulong arg4,
|
||||
abi_ulong arg5, abi_ulong arg6, abi_ulong arg7,
|
||||
abi_ulong arg8);
|
||||
extern __thread CPUState *thread_cpu;
|
||||
G_NORETURN void cpu_loop(CPUArchState *env);
|
||||
abi_long get_errno(abi_long ret);
|
||||
|
@@ -364,10 +364,15 @@
|
||||
440 common process_madvise sys_process_madvise
|
||||
441 common epoll_pwait2 sys_epoll_pwait2
|
||||
442 common mount_setattr sys_mount_setattr
|
||||
# 443 reserved for quotactl_path
|
||||
443 common quotactl_fd sys_quotactl_fd
|
||||
444 common landlock_create_ruleset sys_landlock_create_ruleset
|
||||
445 common landlock_add_rule sys_landlock_add_rule
|
||||
446 common landlock_restrict_self sys_landlock_restrict_self
|
||||
447 common memfd_secret sys_memfd_secret
|
||||
448 common process_mrelease sys_process_mrelease
|
||||
449 common futex_waitv sys_futex_waitv
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common memfd_restricted sys_memfd_restricted
|
||||
|
||||
#
|
||||
# Due to a historical design error, certain syscalls are numbered differently
|
||||
@@ -396,7 +401,7 @@
|
||||
530 x32 set_robust_list compat_sys_set_robust_list
|
||||
531 x32 get_robust_list compat_sys_get_robust_list
|
||||
532 x32 vmsplice sys_vmsplice
|
||||
533 x32 move_pages compat_sys_move_pages
|
||||
533 x32 move_pages sys_move_pages
|
||||
534 x32 preadv compat_sys_preadv64
|
||||
535 x32 pwritev compat_sys_pwritev64
|
||||
536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
|
||||
|
@@ -1797,7 +1797,7 @@ config_host_data.set_quoted('CONFIG_SYSCONFDIR', get_option('prefix') / get_opti
|
||||
if config_host.has_key('CONFIG_MODULES')
|
||||
config_host_data.set('CONFIG_STAMP', run_command(
|
||||
meson.current_source_dir() / 'scripts/qemu-stamp.py',
|
||||
meson.project_version(), get_option('pkgversion'), '--',
|
||||
meson.project_version(), '--',
|
||||
meson.current_source_dir() / 'configure',
|
||||
capture: true, check: true).stdout().strip())
|
||||
endif
|
||||
|
718
migration/cgs-tdx.c
Normal file
718
migration/cgs-tdx.c
Normal file
@@ -0,0 +1,718 @@
|
||||
/*
|
||||
* QEMU Migration for Intel TDX Guests
|
||||
*
|
||||
* Copyright (C) 2022 Intel Corp.
|
||||
*
|
||||
* Authors:
|
||||
* Wei Wang <wei.w.wang@intel.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu-file.h"
|
||||
#include "cgs.h"
|
||||
#include "target/i386/kvm/tdx.h"
|
||||
#include "migration/misc.h"
|
||||
#include "qemu/error-report.h"
|
||||
|
||||
/* MBMD, gpa_list and 2 pages of mac_list */
|
||||
#define MULTIFD_EXTRA_IOV_NUM 4
|
||||
|
||||
/* Bytes of the MBMD for memory page, calculated from the spec */
|
||||
#define TDX_MBMD_MEM_BYTES 48
|
||||
|
||||
#define KVM_TDX_MIG_MBMD_TYPE_IMMUTABLE_STATE 0
|
||||
#define KVM_TDX_MIG_MBMD_TYPE_TD_STATE 1
|
||||
#define KVM_TDX_MIG_MBMD_TYPE_VCPU_STATE 2
|
||||
#define KVM_TDX_MIG_MBMD_TYPE_MEMORY_STATE 16
|
||||
#define KVM_TDX_MIG_MBMD_TYPE_EPOCH_TOKEN 32
|
||||
#define KVM_TDX_MIG_MBMD_TYPE_ABORT_TOKEN 33
|
||||
|
||||
#define GPA_LIST_OP_EXPORT 1
|
||||
#define GPA_LIST_OP_CANCEL 2
|
||||
|
||||
#define TDX_MIG_F_CONTINUE 0x1
|
||||
|
||||
typedef struct TdxMigHdr {
|
||||
uint16_t flags;
|
||||
uint16_t buf_list_num;
|
||||
} TdxMigHdr;
|
||||
|
||||
typedef union GpaListEntry {
|
||||
uint64_t val;
|
||||
struct {
|
||||
uint64_t level:2;
|
||||
uint64_t pending:1;
|
||||
uint64_t reserved_0:4;
|
||||
uint64_t l2_map:3;
|
||||
#define GPA_LIST_ENTRY_MIG_TYPE_4KB 0
|
||||
uint64_t mig_type:2;
|
||||
uint64_t gfn:40;
|
||||
uint64_t operation:2;
|
||||
uint64_t reserved_1:2;
|
||||
uint64_t status:5;
|
||||
uint64_t reserved_2:3;
|
||||
};
|
||||
} GpaListEntry;
|
||||
|
||||
typedef struct TdxMigStream {
|
||||
int fd;
|
||||
void *mbmd;
|
||||
void *buf_list;
|
||||
void *mac_list;
|
||||
void *gpa_list;
|
||||
} TdxMigStream;
|
||||
|
||||
typedef struct TdxMigState {
|
||||
uint32_t nr_streams;
|
||||
TdxMigStream *streams;
|
||||
} TdxMigState;
|
||||
|
||||
TdxMigState tdx_mig;
|
||||
|
||||
static int tdx_mig_stream_ioctl(TdxMigStream *stream, int cmd_id,
|
||||
__u32 metadata, void *data)
|
||||
{
|
||||
struct kvm_tdx_cmd tdx_cmd;
|
||||
int ret;
|
||||
|
||||
memset(&tdx_cmd, 0x0, sizeof(tdx_cmd));
|
||||
|
||||
tdx_cmd.id = cmd_id;
|
||||
tdx_cmd.flags = metadata;
|
||||
tdx_cmd.data = (__u64)(unsigned long)data;
|
||||
|
||||
ret = kvm_device_ioctl(stream->fd, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd);
|
||||
if (ret) {
|
||||
error_report("Failed to send migration cmd %d to the driver: %s",
|
||||
cmd_id, strerror(ret));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static uint64_t tdx_mig_put_mig_hdr(QEMUFile *f, uint64_t num, uint16_t flags)
|
||||
{
|
||||
TdxMigHdr hdr = {
|
||||
.flags = flags,
|
||||
.buf_list_num = (uint16_t)num,
|
||||
};
|
||||
|
||||
qemu_put_buffer(f, (uint8_t *)&hdr, sizeof(hdr));
|
||||
|
||||
return sizeof(hdr);
|
||||
}
|
||||
|
||||
static inline uint64_t tdx_mig_stream_get_mbmd_bytes(TdxMigStream *stream)
|
||||
{
|
||||
/*
|
||||
* The first 2 bytes in MBMD buffer tells the overall size of the mbmd
|
||||
* data (see TDX module v1.5 ABI spec).
|
||||
*/
|
||||
uint16_t bytes = *(uint16_t *)stream->mbmd;
|
||||
|
||||
return (uint64_t)bytes;
|
||||
}
|
||||
|
||||
static uint8_t tdx_mig_stream_get_mbmd_type(TdxMigStream *stream)
|
||||
{
|
||||
/* TDX module v1.5 ABI spec: MB_TYPE at byte offset 6 */
|
||||
return *((uint8_t *)stream->mbmd + 6);
|
||||
}
|
||||
|
||||
static int tdx_mig_savevm_state_start(QEMUFile *f)
|
||||
{
|
||||
TdxMigStream *stream = &tdx_mig.streams[0];
|
||||
uint64_t mbmd_bytes, buf_list_bytes, exported_num = 0;
|
||||
int ret;
|
||||
|
||||
/* Export mbmd and buf_list */
|
||||
ret = tdx_mig_stream_ioctl(stream, KVM_TDX_MIG_EXPORT_STATE_IMMUTABLE,
|
||||
0, &exported_num);
|
||||
if (ret) {
|
||||
error_report("Failed to export immutable states: %s", strerror(ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
mbmd_bytes = tdx_mig_stream_get_mbmd_bytes(stream);
|
||||
buf_list_bytes = exported_num * TARGET_PAGE_SIZE;
|
||||
|
||||
tdx_mig_put_mig_hdr(f, exported_num, 0);
|
||||
qemu_put_buffer(f, (uint8_t *)stream->mbmd, mbmd_bytes);
|
||||
qemu_put_buffer(f, (uint8_t *)stream->buf_list, buf_list_bytes);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long tdx_mig_save_epoch(QEMUFile *f, bool in_order_done)
|
||||
{
|
||||
TdxMigStream *stream = &tdx_mig.streams[0];
|
||||
uint64_t flags = in_order_done ? TDX_MIG_EXPORT_TRACK_F_IN_ORDER_DONE : 0;
|
||||
long tdx_hdr_bytes, mbmd_bytes;
|
||||
int ret;
|
||||
|
||||
ret = tdx_mig_stream_ioctl(stream, KVM_TDX_MIG_EXPORT_TRACK, 0, &flags);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
mbmd_bytes = tdx_mig_stream_get_mbmd_bytes(stream);
|
||||
|
||||
/* Epoch only has mbmd data */
|
||||
tdx_hdr_bytes = tdx_mig_put_mig_hdr(f, 0, 0);
|
||||
qemu_put_buffer(f, (uint8_t *)stream->mbmd, mbmd_bytes);
|
||||
|
||||
return tdx_hdr_bytes + mbmd_bytes;
|
||||
}
|
||||
|
||||
static long tdx_mig_savevm_state_ram_start_epoch(QEMUFile *f)
|
||||
{
|
||||
return tdx_mig_save_epoch(f, false);
|
||||
}
|
||||
|
||||
static void tdx_mig_gpa_list_setup(union GpaListEntry *gpa_list, hwaddr *gpa,
|
||||
uint64_t gpa_num, int operation)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < gpa_num; i++) {
|
||||
gpa_list[i].val = 0;
|
||||
gpa_list[i].gfn = gpa[i] >> TARGET_PAGE_BITS;
|
||||
gpa_list[i].mig_type = GPA_LIST_ENTRY_MIG_TYPE_4KB;
|
||||
gpa_list[i].operation = operation;
|
||||
}
|
||||
}
|
||||
|
||||
static long tdx_mig_save_ram(QEMUFile *f, TdxMigStream *stream)
|
||||
{
|
||||
uint64_t num = 1;
|
||||
uint64_t hdr_bytes, mbmd_bytes, gpa_list_bytes,
|
||||
buf_list_bytes, mac_list_bytes;
|
||||
int ret;
|
||||
|
||||
/* Export mbmd, buf list, mac list and gpa list */
|
||||
ret = tdx_mig_stream_ioctl(stream, KVM_TDX_MIG_EXPORT_MEM, 0, &num);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
mbmd_bytes = tdx_mig_stream_get_mbmd_bytes(stream);
|
||||
buf_list_bytes = TARGET_PAGE_SIZE;
|
||||
mac_list_bytes = sizeof(Int128);
|
||||
gpa_list_bytes = sizeof(GpaListEntry);
|
||||
|
||||
hdr_bytes = tdx_mig_put_mig_hdr(f, 1, 0);
|
||||
qemu_put_buffer(f, (uint8_t *)stream->mbmd, mbmd_bytes);
|
||||
qemu_put_buffer(f, (uint8_t *)stream->buf_list, buf_list_bytes);
|
||||
qemu_put_buffer(f, (uint8_t *)stream->gpa_list, gpa_list_bytes);
|
||||
qemu_put_buffer(f, (uint8_t *)stream->mac_list, mac_list_bytes);
|
||||
|
||||
return hdr_bytes + mbmd_bytes + gpa_list_bytes +
|
||||
buf_list_bytes + mac_list_bytes;
|
||||
}
|
||||
|
||||
static long tdx_mig_savevm_state_ram(QEMUFile *f,
|
||||
uint32_t channel_id, hwaddr gpa)
|
||||
{
|
||||
TdxMigStream *stream = &tdx_mig.streams[channel_id];
|
||||
|
||||
tdx_mig_gpa_list_setup((GpaListEntry *)stream->gpa_list,
|
||||
&gpa, 1, GPA_LIST_OP_EXPORT);
|
||||
return tdx_mig_save_ram(f, stream);
|
||||
}
|
||||
|
||||
static long tdx_mig_savevm_state_ram_cancel(QEMUFile *f, hwaddr gpa)
|
||||
{
|
||||
TdxMigStream *stream = &tdx_mig.streams[0];
|
||||
|
||||
tdx_mig_gpa_list_setup((GpaListEntry *)stream->gpa_list, &gpa, 1,
|
||||
GPA_LIST_OP_CANCEL);
|
||||
return tdx_mig_save_ram(f, stream);
|
||||
}
|
||||
|
||||
static int tdx_mig_savevm_state_pause(void)
|
||||
{
|
||||
TdxMigStream *stream = &tdx_mig.streams[0];
|
||||
|
||||
return tdx_mig_stream_ioctl(stream, KVM_TDX_MIG_EXPORT_PAUSE, 0, 0);
|
||||
}
|
||||
|
||||
static int tdx_mig_save_td(QEMUFile *f, TdxMigStream *stream)
|
||||
{
|
||||
int ret;
|
||||
uint64_t mbmd_bytes, buf_list_bytes, exported_num = 0;
|
||||
|
||||
ret = tdx_mig_stream_ioctl(stream, KVM_TDX_MIG_EXPORT_STATE_TD, 0,
|
||||
&exported_num);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
mbmd_bytes = tdx_mig_stream_get_mbmd_bytes(stream);
|
||||
buf_list_bytes = exported_num * TARGET_PAGE_SIZE;
|
||||
|
||||
/*
|
||||
* The TD-scope states and vCPU states are sent together, so add the
|
||||
* CONTINUE flag to have the destination side continue the loading.
|
||||
*/
|
||||
tdx_mig_put_mig_hdr(f, exported_num, TDX_MIG_F_CONTINUE);
|
||||
qemu_put_buffer(f, (uint8_t *)stream->mbmd, mbmd_bytes);
|
||||
qemu_put_buffer(f, (uint8_t *)stream->buf_list, buf_list_bytes);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tdx_mig_save_one_vcpu(QEMUFile *f, TdxMigStream *stream)
|
||||
{
|
||||
uint64_t mbmd_bytes, buf_list_bytes, exported_num = 0;
|
||||
int ret;
|
||||
|
||||
ret = tdx_mig_stream_ioctl(stream, KVM_TDX_MIG_EXPORT_STATE_VP, 0,
|
||||
&exported_num);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
mbmd_bytes = tdx_mig_stream_get_mbmd_bytes(stream);
|
||||
buf_list_bytes = exported_num * TARGET_PAGE_SIZE;
|
||||
/* Ask the destination to continue to load the next vCPU states */
|
||||
tdx_mig_put_mig_hdr(f, exported_num, TDX_MIG_F_CONTINUE);
|
||||
|
||||
qemu_put_buffer(f, (uint8_t *)stream->mbmd, mbmd_bytes);
|
||||
qemu_put_buffer(f, (uint8_t *)stream->buf_list, buf_list_bytes);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tdx_mig_save_vcpus(QEMUFile *f, TdxMigStream *stream)
|
||||
{
|
||||
CPUState *cpu;
|
||||
int ret;
|
||||
|
||||
CPU_FOREACH(cpu) {
|
||||
ret = tdx_mig_save_one_vcpu(f, stream);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tdx_mig_savevm_state_end(QEMUFile *f)
|
||||
{
|
||||
TdxMigStream *stream = &tdx_mig.streams[0];
|
||||
int ret;
|
||||
|
||||
ret = tdx_mig_save_td(f, stream);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = tdx_mig_save_vcpus(f, stream);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = tdx_mig_save_epoch(f, true);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool tdx_mig_is_ready(void)
|
||||
{
|
||||
return tdx_premig_is_done();
|
||||
}
|
||||
|
||||
static int tdx_mig_stream_create(TdxMigStream *stream)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = kvm_create_device(kvm_state, KVM_DEV_TYPE_TDX_MIG_STREAM, false);
|
||||
if (ret < 0) {
|
||||
error_report("Failed to create stream due to %s", strerror(errno));
|
||||
return ret;
|
||||
}
|
||||
stream->fd = ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tdx_mig_do_stream_setup(TdxMigStream *stream, uint32_t nr_pages)
|
||||
{
|
||||
int ret;
|
||||
struct kvm_dev_tdx_mig_attr tdx_mig_attr;
|
||||
struct kvm_device_attr attr = {
|
||||
.group = KVM_DEV_TDX_MIG_ATTR,
|
||||
.addr = (uint64_t)&tdx_mig_attr,
|
||||
.attr = sizeof(struct kvm_dev_tdx_mig_attr),
|
||||
};
|
||||
size_t map_size;
|
||||
off_t map_offset;
|
||||
|
||||
ret = tdx_mig_stream_create(stream);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Tell the tdx_mig driver the number of pages to add to buffer list for
|
||||
* TD private page export/import. Currently, TD private pages are migrated
|
||||
* one by one.
|
||||
*/
|
||||
tdx_mig_attr.buf_list_pages = nr_pages;
|
||||
tdx_mig_attr.version = KVM_DEV_TDX_MIG_ATTR_VERSION;
|
||||
if (kvm_device_ioctl(stream->fd, KVM_SET_DEVICE_ATTR, &attr) < 0) {
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
memset(&tdx_mig_attr, 0, sizeof(struct kvm_dev_tdx_mig_attr));
|
||||
tdx_mig_attr.version = KVM_DEV_TDX_MIG_ATTR_VERSION;
|
||||
if (kvm_device_ioctl(stream->fd, KVM_GET_DEVICE_ATTR, &attr) < 0) {
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
map_offset = TDX_MIG_STREAM_MBMD_MAP_OFFSET;
|
||||
map_size = (TDX_MIG_STREAM_GPA_LIST_MAP_OFFSET -
|
||||
TDX_MIG_STREAM_MBMD_MAP_OFFSET) * TARGET_PAGE_SIZE;
|
||||
stream->mbmd = mmap(NULL, map_size, PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||
stream->fd, map_offset);
|
||||
if (stream->mbmd == MAP_FAILED) {
|
||||
ret = -errno;
|
||||
error_report("Failed to map mbmd due to %s", strerror(ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
map_offset = TDX_MIG_STREAM_GPA_LIST_MAP_OFFSET * TARGET_PAGE_SIZE;
|
||||
map_size = (TDX_MIG_STREAM_MAC_LIST_MAP_OFFSET -
|
||||
TDX_MIG_STREAM_GPA_LIST_MAP_OFFSET) * TARGET_PAGE_SIZE;
|
||||
stream->gpa_list = mmap(NULL, map_size, PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||
stream->fd, map_offset);
|
||||
if (stream->gpa_list == MAP_FAILED) {
|
||||
ret = -errno;
|
||||
error_report("Failed to map gpa list due to %s", strerror(ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
map_offset = TDX_MIG_STREAM_MAC_LIST_MAP_OFFSET * TARGET_PAGE_SIZE;
|
||||
map_size = (TDX_MIG_STREAM_BUF_LIST_MAP_OFFSET -
|
||||
TDX_MIG_STREAM_MAC_LIST_MAP_OFFSET) * TARGET_PAGE_SIZE;
|
||||
stream->mac_list = mmap(NULL, map_size, PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||
stream->fd, map_offset);
|
||||
if (stream->mac_list == MAP_FAILED) {
|
||||
ret = -errno;
|
||||
error_report("Failed to map mac list due to %s", strerror(ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
map_offset = TDX_MIG_STREAM_BUF_LIST_MAP_OFFSET * TARGET_PAGE_SIZE;
|
||||
map_size = tdx_mig_attr.buf_list_pages * TARGET_PAGE_SIZE;
|
||||
stream->buf_list = mmap(NULL, map_size, PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||
stream->fd, map_offset);
|
||||
if (stream->buf_list == MAP_FAILED) {
|
||||
ret = -errno;
|
||||
error_report("Failed to map buf list due to %s", strerror(ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tdx_mig_stream_setup(uint32_t nr_channels, uint32_t nr_pages)
|
||||
{
|
||||
TdxMigStream *stream;
|
||||
int i, ret;
|
||||
|
||||
tdx_mig.streams = g_malloc0(sizeof(struct TdxMigStream) * nr_channels);
|
||||
|
||||
for (i = 0; i < nr_channels; i++) {
|
||||
stream = &tdx_mig.streams[i];
|
||||
ret = tdx_mig_do_stream_setup(stream, nr_pages);
|
||||
if (!ret) {
|
||||
tdx_mig.nr_streams++;
|
||||
} else {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void tdx_mig_stream_cleanup(TdxMigStream *stream)
|
||||
{
|
||||
struct kvm_dev_tdx_mig_attr tdx_mig_attr;
|
||||
struct kvm_device_attr attr = {
|
||||
.group = KVM_DEV_TDX_MIG_ATTR,
|
||||
.addr = (uint64_t)&tdx_mig_attr,
|
||||
.attr = sizeof(struct kvm_dev_tdx_mig_attr),
|
||||
};
|
||||
size_t unmap_size;
|
||||
int ret;
|
||||
|
||||
memset(&tdx_mig_attr, 0, sizeof(struct kvm_dev_tdx_mig_attr));
|
||||
ret = kvm_device_ioctl(stream->fd, KVM_GET_DEVICE_ATTR, &attr);
|
||||
if (ret < 0) {
|
||||
error_report("tdx mig cleanup failed: %s", strerror(ret));
|
||||
return;
|
||||
}
|
||||
|
||||
unmap_size = (TDX_MIG_STREAM_GPA_LIST_MAP_OFFSET -
|
||||
TDX_MIG_STREAM_MBMD_MAP_OFFSET) * TARGET_PAGE_SIZE;
|
||||
munmap(stream->mbmd, unmap_size);
|
||||
|
||||
unmap_size = (TDX_MIG_STREAM_MAC_LIST_MAP_OFFSET -
|
||||
TDX_MIG_STREAM_GPA_LIST_MAP_OFFSET) * TARGET_PAGE_SIZE;
|
||||
munmap(stream->gpa_list, unmap_size);
|
||||
|
||||
unmap_size = (TDX_MIG_STREAM_BUF_LIST_MAP_OFFSET -
|
||||
TDX_MIG_STREAM_MAC_LIST_MAP_OFFSET) * TARGET_PAGE_SIZE;
|
||||
munmap(stream->mac_list, unmap_size);
|
||||
|
||||
unmap_size = tdx_mig_attr.buf_list_pages * TARGET_PAGE_SIZE;
|
||||
munmap(stream->buf_list, unmap_size);
|
||||
close(stream->fd);
|
||||
}
|
||||
|
||||
static void tdx_mig_cleanup(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < tdx_mig.nr_streams; i++) {
|
||||
tdx_mig_stream_cleanup(&tdx_mig.streams[i]);
|
||||
}
|
||||
|
||||
tdx_mig.nr_streams = 0;
|
||||
|
||||
g_free(tdx_mig.streams);
|
||||
tdx_mig.streams = NULL;
|
||||
}
|
||||
|
||||
static void tdx_mig_loadvm_state_cleanup(void)
|
||||
{
|
||||
TdxMigStream *stream = &tdx_mig.streams[0];
|
||||
|
||||
tdx_mig_stream_ioctl(stream, KVM_TDX_MIG_IMPORT_END, 0, 0);
|
||||
tdx_mig_cleanup();
|
||||
}
|
||||
|
||||
static int tdx_mig_savevm_state_ram_abort(hwaddr gfn_end)
|
||||
{
|
||||
TdxMigStream *stream = &tdx_mig.streams[0];
|
||||
int ret;
|
||||
|
||||
/* No page has been exported yet. */
|
||||
if (!gfn_end) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = tdx_mig_stream_ioctl(stream, KVM_TDX_MIG_EXPORT_ABORT, 0, &gfn_end);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tdx_mig_loadvm_state(QEMUFile *f, uint32_t channel_id)
|
||||
{
|
||||
TdxMigStream *stream = &tdx_mig.streams[channel_id];
|
||||
uint64_t mbmd_bytes, buf_list_bytes, mac_list_bytes, gpa_list_bytes;
|
||||
uint64_t buf_list_num = 0;
|
||||
bool should_continue = true;
|
||||
uint8_t mbmd_type;
|
||||
int ret, cmd_id;
|
||||
TdxMigHdr hdr;
|
||||
|
||||
while (should_continue) {
|
||||
if (should_continue && qemu_peek_le16(f, sizeof(hdr)) == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
qemu_get_buffer(f, (uint8_t *)&hdr, sizeof(hdr));
|
||||
mbmd_bytes = qemu_peek_le16(f, 0);
|
||||
qemu_get_buffer(f, (uint8_t *)stream->mbmd, mbmd_bytes);
|
||||
mbmd_type = tdx_mig_stream_get_mbmd_type(stream);
|
||||
|
||||
buf_list_num = hdr.buf_list_num;
|
||||
buf_list_bytes = buf_list_num * TARGET_PAGE_SIZE;
|
||||
if (buf_list_num) {
|
||||
qemu_get_buffer(f, (uint8_t *)stream->buf_list, buf_list_bytes);
|
||||
}
|
||||
|
||||
switch (mbmd_type) {
|
||||
case KVM_TDX_MIG_MBMD_TYPE_IMMUTABLE_STATE:
|
||||
cmd_id = KVM_TDX_MIG_IMPORT_STATE_IMMUTABLE;
|
||||
break;
|
||||
case KVM_TDX_MIG_MBMD_TYPE_MEMORY_STATE:
|
||||
cmd_id = KVM_TDX_MIG_IMPORT_MEM;
|
||||
mac_list_bytes = buf_list_num * sizeof(Int128);
|
||||
gpa_list_bytes = buf_list_num * sizeof(GpaListEntry);
|
||||
qemu_get_buffer(f, (uint8_t *)stream->gpa_list, gpa_list_bytes);
|
||||
qemu_get_buffer(f, (uint8_t *)stream->mac_list, mac_list_bytes);
|
||||
break;
|
||||
case KVM_TDX_MIG_MBMD_TYPE_EPOCH_TOKEN:
|
||||
cmd_id = KVM_TDX_MIG_IMPORT_TRACK;
|
||||
break;
|
||||
case KVM_TDX_MIG_MBMD_TYPE_TD_STATE:
|
||||
cmd_id = KVM_TDX_MIG_IMPORT_STATE_TD;
|
||||
break;
|
||||
case KVM_TDX_MIG_MBMD_TYPE_VCPU_STATE:
|
||||
cmd_id = KVM_TDX_MIG_IMPORT_STATE_VP;
|
||||
break;
|
||||
default:
|
||||
error_report("%s: unsupported mb_type %d", __func__, mbmd_type);
|
||||
return -1;
|
||||
}
|
||||
|
||||
ret = tdx_mig_stream_ioctl(stream, cmd_id, 0, &buf_list_num);
|
||||
|
||||
if (ret) {
|
||||
if (buf_list_num != 0) {
|
||||
error_report("%s: buf_list_num=%lx", __func__, buf_list_num);
|
||||
}
|
||||
break;
|
||||
}
|
||||
should_continue = hdr.flags & TDX_MIG_F_CONTINUE;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static uint32_t tdx_mig_iov_num(uint32_t page_batch_num)
|
||||
{
|
||||
uint32_t iov_num;
|
||||
|
||||
/* The TDX migration architecture supports up to 512 pages */
|
||||
if (page_batch_num > 512) {
|
||||
error_report("%u is larger than the max (512)", page_batch_num);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (page_batch_num > 256) {
|
||||
iov_num = MULTIFD_EXTRA_IOV_NUM + page_batch_num;
|
||||
} else {
|
||||
/* One less MAC page is used */
|
||||
iov_num = MULTIFD_EXTRA_IOV_NUM - 1 + page_batch_num;
|
||||
}
|
||||
|
||||
return iov_num;
|
||||
}
|
||||
|
||||
static int tdx_mig_multifd_send_prepare(MultiFDSendParams *p, Error **errp)
|
||||
{
|
||||
TdxMigStream *stream = &tdx_mig.streams[p->id];
|
||||
MultiFDPages_t *pages = p->pages;
|
||||
uint32_t i, iovs_num = p->iovs_num, packet_size = 0;
|
||||
uint64_t page_num = pages->num;
|
||||
int ret;
|
||||
|
||||
tdx_mig_gpa_list_setup((GpaListEntry *)stream->gpa_list,
|
||||
pages->private_gpa, page_num, GPA_LIST_OP_EXPORT);
|
||||
|
||||
ret = tdx_mig_stream_ioctl(stream, KVM_TDX_MIG_EXPORT_MEM, 0, &page_num);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
page_num = pages->num;
|
||||
|
||||
/* MBMD */
|
||||
p->iov[iovs_num].iov_base = stream->mbmd;
|
||||
p->iov[iovs_num++].iov_len = TDX_MBMD_MEM_BYTES;
|
||||
packet_size = TDX_MBMD_MEM_BYTES;
|
||||
|
||||
/* GPA list */
|
||||
p->iov[iovs_num].iov_base = stream->gpa_list;
|
||||
p->iov[iovs_num].iov_len = sizeof(GpaListEntry) * page_num;
|
||||
packet_size += p->iov[iovs_num++].iov_len;
|
||||
|
||||
/* TODO: check if there is a 2nd MAC list */
|
||||
/* MAC list */
|
||||
p->iov[iovs_num].iov_base = stream->mac_list;
|
||||
p->iov[iovs_num].iov_len = sizeof(Int128) * page_num;
|
||||
packet_size += p->iov[iovs_num++].iov_len;
|
||||
|
||||
/* Buffer list */
|
||||
for (i = 0; i < page_num; i++) {
|
||||
p->iov[iovs_num].iov_base = stream->buf_list + TARGET_PAGE_SIZE * i;
|
||||
p->iov[iovs_num].iov_len = TARGET_PAGE_SIZE;
|
||||
packet_size += p->iov[iovs_num++].iov_len;
|
||||
}
|
||||
|
||||
p->iovs_num = iovs_num;
|
||||
p->next_packet_size = packet_size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tdx_mig_multifd_recv_pages(MultiFDRecvParams *p, Error **errp)
|
||||
{
|
||||
TdxMigStream *stream = &tdx_mig.streams[p->id];
|
||||
uint32_t i, iovs_num = 0;
|
||||
uint64_t gfn_num = p->normal_num;
|
||||
uint8_t mbmd_type;
|
||||
int ret;
|
||||
|
||||
/* MBMD */
|
||||
p->iov[iovs_num].iov_base = stream->mbmd;
|
||||
p->iov[iovs_num++].iov_len = TDX_MBMD_MEM_BYTES;
|
||||
|
||||
/* GPA list */
|
||||
p->iov[iovs_num].iov_base = stream->gpa_list;
|
||||
p->iov[iovs_num++].iov_len = sizeof(GpaListEntry) * gfn_num;
|
||||
|
||||
/* MAC list */
|
||||
p->iov[iovs_num].iov_base = stream->mac_list;
|
||||
p->iov[iovs_num++].iov_len = sizeof(Int128) * gfn_num;
|
||||
|
||||
/* Buffer list */
|
||||
for (i = 0; i < gfn_num; i++) {
|
||||
p->iov[iovs_num].iov_base = stream->buf_list + TARGET_PAGE_SIZE * i;
|
||||
p->iov[iovs_num++].iov_len = TARGET_PAGE_SIZE;
|
||||
}
|
||||
|
||||
ret = qio_channel_readv_all(p->c, p->iov, iovs_num, errp);
|
||||
if (ret) {
|
||||
error_report("%s: channel read: %s\n", __func__, strerror(ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
mbmd_type = tdx_mig_stream_get_mbmd_type(stream);
|
||||
if (mbmd_type != KVM_TDX_MIG_MBMD_TYPE_MEMORY_STATE) {
|
||||
error_report("%s: packet received wrong, mbmd=%d\n", __func__, mbmd_type);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = tdx_mig_stream_ioctl(stream, KVM_TDX_MIG_IMPORT_MEM, 0, &gfn_num);
|
||||
if (ret) {
|
||||
error_report("%s failed: %s, gfn_num=%lu\n",
|
||||
__func__, strerror(ret), gfn_num);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void tdx_mig_init(CgsMig *cgs_mig)
|
||||
{
|
||||
cgs_mig->is_ready = tdx_mig_is_ready;
|
||||
cgs_mig->savevm_state_setup = tdx_mig_stream_setup;
|
||||
cgs_mig->savevm_state_start = tdx_mig_savevm_state_start;
|
||||
cgs_mig->savevm_state_ram_start_epoch =
|
||||
tdx_mig_savevm_state_ram_start_epoch;
|
||||
cgs_mig->savevm_state_ram = tdx_mig_savevm_state_ram;
|
||||
cgs_mig->savevm_state_pause = tdx_mig_savevm_state_pause;
|
||||
cgs_mig->savevm_state_end = tdx_mig_savevm_state_end;
|
||||
cgs_mig->savevm_state_cleanup = tdx_mig_cleanup;
|
||||
cgs_mig->savevm_state_ram_abort = tdx_mig_savevm_state_ram_abort;
|
||||
cgs_mig->savevm_state_ram_cancel = tdx_mig_savevm_state_ram_cancel;
|
||||
cgs_mig->loadvm_state_setup = tdx_mig_stream_setup;
|
||||
cgs_mig->loadvm_state = tdx_mig_loadvm_state;
|
||||
cgs_mig->loadvm_state_cleanup = tdx_mig_loadvm_state_cleanup;
|
||||
cgs_mig->multifd_send_prepare = tdx_mig_multifd_send_prepare;
|
||||
cgs_mig->multifd_recv_pages = tdx_mig_multifd_recv_pages;
|
||||
cgs_mig->iov_num = tdx_mig_iov_num;
|
||||
}
|
290
migration/cgs.c
Normal file
290
migration/cgs.c
Normal file
@@ -0,0 +1,290 @@
|
||||
/*
|
||||
* QEMU Migration for Confidential Guest Support
|
||||
*
|
||||
* Copyright (C) 2022 Intel Corp.
|
||||
*
|
||||
* Authors:
|
||||
* Wei Wang <wei.w.wang@intel.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "qemu-file.h"
|
||||
#include "sysemu/kvm.h"
|
||||
#include "savevm.h"
|
||||
#include "ram.h"
|
||||
#include "cgs.h"
|
||||
|
||||
static CgsMig cgs_mig;
|
||||
|
||||
#define cgs_check_error(f, ret) \
|
||||
do { \
|
||||
if (ret < 0) { \
|
||||
error_report("%s: failed: %s", __func__, strerror(ret)); \
|
||||
qemu_file_set_error(f, ret); \
|
||||
return ret; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
bool cgs_mig_is_ready(void)
|
||||
{
|
||||
/*
|
||||
* For the legacy VM migration and some vendor specific implementations
|
||||
* that don't require the check, return true to have the migration flow
|
||||
* continue.
|
||||
*/
|
||||
if (!cgs_mig.is_ready) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return cgs_mig.is_ready();
|
||||
}
|
||||
|
||||
int cgs_mig_savevm_state_setup(QEMUFile *f)
|
||||
{
|
||||
int ret;
|
||||
uint32_t nr_channels = 1, nr_pages = 1;
|
||||
|
||||
if (!cgs_mig.savevm_state_setup) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (migrate_use_multifd()) {
|
||||
nr_channels = migrate_multifd_channels();
|
||||
nr_pages = MULTIFD_PACKET_SIZE / TARGET_PAGE_SIZE;
|
||||
} else if (migrate_postcopy_preempt()) {
|
||||
nr_channels = RAM_CHANNEL_MAX;
|
||||
}
|
||||
|
||||
ret = cgs_mig.savevm_state_setup(nr_channels, nr_pages);
|
||||
cgs_check_error(f, ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int cgs_mig_savevm_state_start(QEMUFile *f)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!cgs_mig.savevm_state_start) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
qemu_put_byte(f, QEMU_VM_SECTION_CGS_START);
|
||||
ret = cgs_mig.savevm_state_start(f);
|
||||
cgs_check_error(f, ret);
|
||||
/*
|
||||
* Flush the initial message (i.e. QEMU_VM_SECTION_CGS_START + vendor
|
||||
* specific data if there is) immediately to have the destinatino side
|
||||
* kick off the process as soon as possible.
|
||||
*/
|
||||
if (!ret) {
|
||||
qemu_fflush(f);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Return number of bytes sent or the error value (< 0) */
|
||||
long cgs_ram_save_start_epoch(QEMUFile *f)
|
||||
{
|
||||
long ret;
|
||||
|
||||
if (!cgs_mig.savevm_state_ram_start_epoch) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (migrate_use_multifd() && !migration_in_postcopy()) {
|
||||
ret = multifd_send_sync_main(f);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
ram_save_cgs_epoch_header(f);
|
||||
ret = cgs_mig.savevm_state_ram_start_epoch(f);
|
||||
cgs_check_error(f, ret);
|
||||
|
||||
/* 8 bytes for the cgs header */
|
||||
return ret + 8;
|
||||
}
|
||||
|
||||
/* Return number of bytes sent or the error value (< 0) */
|
||||
long cgs_mig_savevm_state_ram(QEMUFile *f, uint32_t channel_id,
|
||||
RAMBlock *block, ram_addr_t offset, hwaddr gpa)
|
||||
{
|
||||
long hdr_bytes, ret;
|
||||
|
||||
if (!cgs_mig.savevm_state_ram) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
hdr_bytes = ram_save_cgs_ram_header(f, block, offset, false);
|
||||
ret = cgs_mig.savevm_state_ram(f, channel_id, gpa);
|
||||
/*
|
||||
* Returning 0 isn't expected. Either succeed with returning bytes of data
|
||||
* written to the file or error with a negative error code returned.
|
||||
*/
|
||||
assert(ret);
|
||||
cgs_check_error(f, ret);
|
||||
|
||||
return hdr_bytes + ret;
|
||||
}
|
||||
|
||||
int cgs_mig_savevm_state_pause(QEMUFile *f)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!cgs_mig.savevm_state_pause) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = cgs_mig.savevm_state_pause();
|
||||
cgs_check_error(f, ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int cgs_mig_savevm_state_end(QEMUFile *f)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!cgs_mig.savevm_state_end) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
qemu_put_byte(f, QEMU_VM_SECTION_CGS_END);
|
||||
ret = cgs_mig.savevm_state_end(f);
|
||||
cgs_check_error(f, ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* gfn_end indicates the last private page that has been migrated. */
|
||||
int cgs_mig_savevm_state_ram_abort(QEMUFile *f, hwaddr gfn_end)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!cgs_mig.savevm_state_ram_abort) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = cgs_mig.savevm_state_ram_abort(gfn_end);
|
||||
cgs_check_error(f, ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool cgs_mig_savevm_state_need_ram_cancel(void)
|
||||
{
|
||||
return !!cgs_mig.savevm_state_ram_cancel;
|
||||
}
|
||||
|
||||
long cgs_mig_savevm_state_ram_cancel(QEMUFile *f, RAMBlock *block,
|
||||
ram_addr_t offset, hwaddr gpa)
|
||||
{
|
||||
long hdr_bytes, ret;
|
||||
|
||||
if (!cgs_mig.savevm_state_ram_cancel) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
hdr_bytes = ram_save_cgs_ram_header(f, block, offset, true);
|
||||
ret = cgs_mig.savevm_state_ram_cancel(f, gpa);
|
||||
cgs_check_error(f, ret);
|
||||
|
||||
return hdr_bytes + ret;
|
||||
}
|
||||
|
||||
void cgs_mig_savevm_state_cleanup(void)
|
||||
{
|
||||
if (cgs_mig.savevm_state_cleanup) {
|
||||
cgs_mig.savevm_state_cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
int cgs_mig_loadvm_state_setup(QEMUFile *f)
|
||||
{
|
||||
int ret;
|
||||
uint32_t nr_channels = 1, nr_pages = 1;
|
||||
|
||||
if (!cgs_mig.loadvm_state_setup) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (migrate_use_multifd()) {
|
||||
nr_channels = migrate_multifd_channels();
|
||||
nr_pages = MULTIFD_PACKET_SIZE / TARGET_PAGE_SIZE;
|
||||
} else if (migrate_postcopy_preempt()) {
|
||||
nr_channels = RAM_CHANNEL_MAX;
|
||||
}
|
||||
|
||||
ret = cgs_mig.loadvm_state_setup(nr_channels, nr_pages);
|
||||
cgs_check_error(f, ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int cgs_mig_loadvm_state(QEMUFile *f, uint32_t channel_id)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!cgs_mig.loadvm_state) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = cgs_mig.loadvm_state(f, channel_id);
|
||||
cgs_check_error(f, ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void cgs_mig_loadvm_state_cleanup(void)
|
||||
{
|
||||
if (cgs_mig.loadvm_state_cleanup) {
|
||||
cgs_mig.loadvm_state_cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
int cgs_mig_multifd_send_prepare(MultiFDSendParams *p, Error **errp)
|
||||
{
|
||||
|
||||
if (!cgs_mig.multifd_send_prepare) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return cgs_mig.multifd_send_prepare(p, errp);
|
||||
}
|
||||
|
||||
int cgs_mig_multifd_recv_pages(MultiFDRecvParams *p, Error **errp)
|
||||
{
|
||||
if (!cgs_mig.multifd_recv_pages) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return cgs_mig.multifd_recv_pages(p, errp);
|
||||
}
|
||||
|
||||
uint32_t cgs_mig_iov_num(uint32_t page_batch_num)
|
||||
{
|
||||
if (!cgs_mig.iov_num) {
|
||||
return page_batch_num;
|
||||
}
|
||||
|
||||
return cgs_mig.iov_num(page_batch_num);
|
||||
}
|
||||
|
||||
void cgs_mig_init(void)
|
||||
{
|
||||
switch (kvm_vm_type) {
|
||||
case KVM_X86_TDX_VM:
|
||||
tdx_mig_init(&cgs_mig);
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
}
|
65
migration/cgs.h
Normal file
65
migration/cgs.h
Normal file
@@ -0,0 +1,65 @@
|
||||
/*
|
||||
* QEMU Migration for Confidential Guest Support
|
||||
*
|
||||
* Copyright (C) 2022 Intel Corp.
|
||||
*
|
||||
* Authors:
|
||||
* Wei Wang <wei.w.wang@intel.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef QEMU_MIGRATION_CGS_H
|
||||
#define QEMU_MIGRATION_CGS_H
|
||||
#include "qemu/osdep.h"
|
||||
#include "migration.h"
|
||||
#include "multifd.h"
|
||||
|
||||
#define CGS_PRIVATE_GPA_INVALID (~0UL)
|
||||
|
||||
typedef struct CgsMig {
|
||||
bool (*is_ready)(void);
|
||||
int (*savevm_state_setup)(uint32_t nr_channels, uint32_t nr_pages);
|
||||
int (*savevm_state_start)(QEMUFile *f);
|
||||
long (*savevm_state_ram_start_epoch)(QEMUFile *f);
|
||||
long (*savevm_state_ram)(QEMUFile *f, uint32_t channel_id, hwaddr gpa);
|
||||
int (*savevm_state_pause)(void);
|
||||
int (*savevm_state_end)(QEMUFile *f);
|
||||
int (*savevm_state_ram_abort)(hwaddr gfn_end);
|
||||
long (*savevm_state_ram_cancel)(QEMUFile *f, hwaddr gpa);
|
||||
void (*savevm_state_cleanup)(void);
|
||||
int (*loadvm_state_setup)(uint32_t nr_channels, uint32_t nr_pages);
|
||||
int (*loadvm_state)(QEMUFile *f, uint32_t channel_id);
|
||||
void (*loadvm_state_cleanup)(void);
|
||||
/* Multifd support */
|
||||
uint32_t (*iov_num)(uint32_t page_batch_num);
|
||||
int (*multifd_send_prepare)(MultiFDSendParams *p, Error **errp);
|
||||
int (*multifd_recv_pages)(MultiFDRecvParams *p, Error **errp);
|
||||
} CgsMig;
|
||||
|
||||
bool cgs_mig_is_ready(void);
|
||||
int cgs_mig_savevm_state_setup(QEMUFile *f);
|
||||
int cgs_mig_savevm_state_start(QEMUFile *f);
|
||||
long cgs_ram_save_start_epoch(QEMUFile *f);
|
||||
long cgs_mig_savevm_state_ram(QEMUFile *f, uint32_t channel_id,
|
||||
RAMBlock *block, ram_addr_t offset, hwaddr gpa);
|
||||
bool cgs_mig_savevm_state_need_ram_cancel(void);
|
||||
long cgs_mig_savevm_state_ram_cancel(QEMUFile *f, RAMBlock *block,
|
||||
ram_addr_t offset, hwaddr gpa);
|
||||
int cgs_mig_savevm_state_pause(QEMUFile *f);
|
||||
int cgs_mig_savevm_state_end(QEMUFile *f);
|
||||
int cgs_mig_savevm_state_ram_abort(QEMUFile *f, hwaddr gfn_end);
|
||||
void cgs_mig_savevm_state_cleanup(void);
|
||||
int cgs_mig_loadvm_state_setup(QEMUFile *f);
|
||||
int cgs_mig_loadvm_state(QEMUFile *f, uint32_t channel_id);
|
||||
void cgs_mig_loadvm_state_cleanup(void);
|
||||
int cgs_mig_multifd_send_prepare(MultiFDSendParams *p, Error **errp);
|
||||
int cgs_mig_multifd_recv_pages(MultiFDRecvParams *p, Error **errp);
|
||||
uint32_t cgs_mig_iov_num(uint32_t page_batch_num);
|
||||
void cgs_mig_init(void);
|
||||
|
||||
void tdx_mig_init(CgsMig *cgs_mig);
|
||||
|
||||
#endif
|
@@ -34,4 +34,5 @@ endif
|
||||
softmmu_ss.add(when: zstd, if_true: files('multifd-zstd.c'))
|
||||
|
||||
specific_ss.add(when: 'CONFIG_SOFTMMU',
|
||||
if_true: files('dirtyrate.c', 'ram.c', 'target.c'))
|
||||
if_true: files('dirtyrate.c', 'ram.c', 'target.c', 'cgs.c',
|
||||
'cgs-tdx.c'))
|
||||
|
@@ -26,6 +26,7 @@
|
||||
#include "sysemu/cpu-throttle.h"
|
||||
#include "rdma.h"
|
||||
#include "ram.h"
|
||||
#include "cgs.h"
|
||||
#include "migration/global_state.h"
|
||||
#include "migration/misc.h"
|
||||
#include "migration.h"
|
||||
@@ -61,6 +62,7 @@
|
||||
#include "sysemu/cpus.h"
|
||||
#include "yank_functions.h"
|
||||
#include "sysemu/qtest.h"
|
||||
#include "sysemu/kvm.h"
|
||||
|
||||
#define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */
|
||||
|
||||
@@ -129,6 +131,8 @@ enum mig_rp_message_type {
|
||||
MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */
|
||||
MIG_RP_MSG_RECV_BITMAP, /* send recved_bitmap back to source */
|
||||
MIG_RP_MSG_RESUME_ACK, /* tell source that we are ready to resume */
|
||||
MIG_RP_MSG_REQ_PRIVATE_PAGES_ID,
|
||||
MIG_RP_MSG_REQ_PRIVATE_PAGES,
|
||||
|
||||
MIG_RP_MSG_MAX
|
||||
};
|
||||
@@ -176,9 +180,8 @@ static MigrationIncomingState *current_incoming;
|
||||
static GSList *migration_blockers;
|
||||
|
||||
static bool migration_object_check(MigrationState *ms, Error **errp);
|
||||
static int migration_maybe_pause(MigrationState *s,
|
||||
int *current_active_state,
|
||||
int new_state);
|
||||
static int migration_pause(MigrationState *s, int *current_active_state,
|
||||
int new_state);
|
||||
static void migrate_fd_cancel(MigrationState *s);
|
||||
|
||||
static bool migrate_allow_multi_channels = true;
|
||||
@@ -202,6 +205,7 @@ static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp)
|
||||
|
||||
void migration_object_init(void)
|
||||
{
|
||||
int i;
|
||||
/* This can only be called once. */
|
||||
assert(!current_migration);
|
||||
current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION));
|
||||
@@ -222,6 +226,15 @@ void migration_object_init(void)
|
||||
qemu_sem_init(¤t_incoming->postcopy_pause_sem_fault, 0);
|
||||
qemu_sem_init(¤t_incoming->postcopy_pause_sem_fast_load, 0);
|
||||
qemu_mutex_init(¤t_incoming->page_request_mutex);
|
||||
qemu_spin_init(¤t_incoming->req_pending_list_lock);
|
||||
qemu_spin_init(¤t_incoming->req_sent_list_lock);
|
||||
QSIMPLEQ_INIT(¤t_incoming->private_fault_req_pending_list);
|
||||
QSIMPLEQ_INIT(¤t_incoming->private_fault_req_sent_list);
|
||||
|
||||
for (i = 0; i < 128; i++) {
|
||||
qemu_sem_init(¤t_incoming->private_fault_req[i].sem, 0);
|
||||
}
|
||||
|
||||
current_incoming->page_requested = g_tree_new(page_request_addr_cmp);
|
||||
|
||||
migration_object_check(current_migration, &error_fatal);
|
||||
@@ -229,6 +242,7 @@ void migration_object_init(void)
|
||||
blk_mig_init();
|
||||
ram_mig_init();
|
||||
dirty_bitmap_mig_init();
|
||||
cgs_mig_init();
|
||||
}
|
||||
|
||||
void migration_cancel(const Error *error)
|
||||
@@ -389,7 +403,8 @@ static int migrate_send_rp_message(MigrationIncomingState *mis,
|
||||
* Len: Length in bytes required - must be a multiple of pagesize
|
||||
*/
|
||||
int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
|
||||
RAMBlock *rb, ram_addr_t start)
|
||||
RAMBlock *rb, ram_addr_t start,
|
||||
bool is_private)
|
||||
{
|
||||
uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */
|
||||
size_t msglen = 12; /* start + len */
|
||||
@@ -417,9 +432,17 @@ int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
|
||||
bufc[msglen++] = rbname_len;
|
||||
memcpy(bufc + msglen, rbname, rbname_len);
|
||||
msglen += rbname_len;
|
||||
msg_type = MIG_RP_MSG_REQ_PAGES_ID;
|
||||
if (is_private) {
|
||||
msg_type = MIG_RP_MSG_REQ_PRIVATE_PAGES_ID;
|
||||
} else {
|
||||
msg_type = MIG_RP_MSG_REQ_PAGES_ID;
|
||||
}
|
||||
} else {
|
||||
msg_type = MIG_RP_MSG_REQ_PAGES;
|
||||
if (is_private) {
|
||||
msg_type = MIG_RP_MSG_REQ_PRIVATE_PAGES;
|
||||
} else {
|
||||
msg_type = MIG_RP_MSG_REQ_PAGES;
|
||||
}
|
||||
}
|
||||
|
||||
return migrate_send_rp_message(mis, msg_type, msglen, bufc);
|
||||
@@ -453,7 +476,7 @@ int migrate_send_rp_req_pages(MigrationIncomingState *mis,
|
||||
return 0;
|
||||
}
|
||||
|
||||
return migrate_send_rp_message_req_pages(mis, rb, start);
|
||||
return migrate_send_rp_message_req_pages(mis, rb, start, false);
|
||||
}
|
||||
|
||||
static bool migration_colo_enabled;
|
||||
@@ -1060,6 +1083,8 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s)
|
||||
info->ram->dirty_sync_count = ram_counters.dirty_sync_count;
|
||||
info->ram->dirty_sync_missed_zero_copy =
|
||||
ram_counters.dirty_sync_missed_zero_copy;
|
||||
info->ram->cgs_epochs = ram_counters.cgs_epochs;
|
||||
info->ram->cgs_private_pages = ram_counters.cgs_private_pages;
|
||||
info->ram->postcopy_requests = ram_counters.postcopy_requests;
|
||||
info->ram->page_size = page_size;
|
||||
info->ram->multifd_bytes = ram_counters.multifd_bytes;
|
||||
@@ -2317,6 +2342,11 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
|
||||
{
|
||||
Error *local_err = NULL;
|
||||
|
||||
if (!cgs_mig_is_ready()) {
|
||||
error_setg(errp, "cgs mig required, but not ready");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (resume) {
|
||||
if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
|
||||
error_setg(errp, "Cannot resume if there is no "
|
||||
@@ -2771,6 +2801,8 @@ static struct rp_cmd_args {
|
||||
[MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" },
|
||||
[MIG_RP_MSG_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" },
|
||||
[MIG_RP_MSG_RESUME_ACK] = { .len = 4, .name = "RESUME_ACK" },
|
||||
[MIG_RP_MSG_REQ_PRIVATE_PAGES] = { .len = 12, .name = "REQ_PRIVATE_PAGES" },
|
||||
[MIG_RP_MSG_REQ_PRIVATE_PAGES_ID] = { .len = -1, .name = "REQ_PRIVATE_PAGES_ID" },
|
||||
[MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" },
|
||||
};
|
||||
|
||||
@@ -2780,7 +2812,7 @@ static struct rp_cmd_args {
|
||||
* and we don't need to send pages that have already been sent.
|
||||
*/
|
||||
static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
|
||||
ram_addr_t start, size_t len)
|
||||
ram_addr_t start, size_t len, bool is_private)
|
||||
{
|
||||
long our_host_ps = qemu_real_host_page_size();
|
||||
|
||||
@@ -2798,7 +2830,7 @@ static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
|
||||
return;
|
||||
}
|
||||
|
||||
if (ram_save_queue_pages(rbname, start, len)) {
|
||||
if (ram_save_queue_pages(rbname, start, len, is_private)) {
|
||||
mark_source_rp_bad(ms);
|
||||
}
|
||||
}
|
||||
@@ -2878,6 +2910,7 @@ static void *source_return_path_thread(void *opaque)
|
||||
uint32_t tmp32, sibling_error;
|
||||
ram_addr_t start = 0; /* =0 to silence warning */
|
||||
size_t len = 0, expected_len;
|
||||
bool is_private;
|
||||
int res;
|
||||
|
||||
trace_source_return_path_thread_entry();
|
||||
@@ -2924,6 +2957,7 @@ retry:
|
||||
goto out;
|
||||
}
|
||||
|
||||
is_private = false;
|
||||
/* OK, we have the message and the data */
|
||||
switch (header_type) {
|
||||
case MIG_RP_MSG_SHUT:
|
||||
@@ -2945,12 +2979,17 @@ retry:
|
||||
trace_source_return_path_thread_pong(tmp32);
|
||||
break;
|
||||
|
||||
case MIG_RP_MSG_REQ_PRIVATE_PAGES:
|
||||
is_private = true;
|
||||
QEMU_FALLTHROUGH;
|
||||
case MIG_RP_MSG_REQ_PAGES:
|
||||
start = ldq_be_p(buf);
|
||||
len = ldl_be_p(buf + 8);
|
||||
migrate_handle_rp_req_pages(ms, NULL, start, len);
|
||||
migrate_handle_rp_req_pages(ms, NULL, start, len, is_private);
|
||||
break;
|
||||
|
||||
case MIG_RP_MSG_REQ_PRIVATE_PAGES_ID:
|
||||
is_private = true;
|
||||
QEMU_FALLTHROUGH;
|
||||
case MIG_RP_MSG_REQ_PAGES_ID:
|
||||
expected_len = 12 + 1; /* header + termination */
|
||||
|
||||
@@ -2968,7 +3007,7 @@ retry:
|
||||
mark_source_rp_bad(ms);
|
||||
goto out;
|
||||
}
|
||||
migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len);
|
||||
migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len, is_private);
|
||||
break;
|
||||
|
||||
case MIG_RP_MSG_RECV_BITMAP:
|
||||
@@ -3111,8 +3150,7 @@ static int postcopy_start(MigrationState *ms)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
ret = migration_maybe_pause(ms, &cur_state,
|
||||
MIGRATION_STATUS_POSTCOPY_ACTIVE);
|
||||
ret = migration_pause(ms, &cur_state, MIGRATION_STATUS_POSTCOPY_ACTIVE);
|
||||
if (ret < 0) {
|
||||
goto fail;
|
||||
}
|
||||
@@ -3123,11 +3161,16 @@ static int postcopy_start(MigrationState *ms)
|
||||
}
|
||||
restart_block = true;
|
||||
|
||||
ret = qemu_savevm_state_prepare_postcopy(ms->to_dst_file);
|
||||
if (ret < 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/*
|
||||
* Cause any non-postcopiable, but iterative devices to
|
||||
* send out their final data.
|
||||
*/
|
||||
qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false);
|
||||
qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false, true);
|
||||
|
||||
/*
|
||||
* in Finish migrate and with the io-lock held everything should
|
||||
@@ -3177,7 +3220,7 @@ static int postcopy_start(MigrationState *ms)
|
||||
*/
|
||||
qemu_savevm_send_postcopy_listen(fb);
|
||||
|
||||
qemu_savevm_state_complete_precopy(fb, false, false);
|
||||
qemu_savevm_state_complete_precopy(fb, false, false, false);
|
||||
if (migrate_postcopy_ram()) {
|
||||
qemu_savevm_send_ping(fb, 3);
|
||||
}
|
||||
@@ -3301,6 +3344,19 @@ static int migration_maybe_pause(MigrationState *s,
|
||||
return s->state == new_state ? 0 : -EINVAL;
|
||||
}
|
||||
|
||||
static int migration_pause(MigrationState *s, int *current_active_state,
|
||||
int new_state)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = migration_maybe_pause(s, current_active_state, new_state);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
return cgs_mig_savevm_state_pause(s->to_dst_file);
|
||||
}
|
||||
|
||||
/**
|
||||
* migration_completion: Used by migration_thread when there's not much left.
|
||||
* The caller 'breaks' the loop when this returns.
|
||||
@@ -3324,14 +3380,16 @@ static void migration_completion(MigrationState *s)
|
||||
ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
|
||||
trace_migration_completion_vm_stop(ret);
|
||||
if (ret >= 0) {
|
||||
ret = migration_maybe_pause(s, ¤t_active_state,
|
||||
MIGRATION_STATUS_DEVICE);
|
||||
ret = migration_pause(s, ¤t_active_state,
|
||||
MIGRATION_STATUS_DEVICE);
|
||||
}
|
||||
|
||||
if (ret >= 0) {
|
||||
qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
|
||||
ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
|
||||
inactivate);
|
||||
inactivate, true);
|
||||
}
|
||||
|
||||
if (inactivate && ret >= 0) {
|
||||
s->block_inactive = true;
|
||||
}
|
||||
@@ -3805,6 +3863,7 @@ static void migration_iteration_finish(MigrationState *s)
|
||||
case MIGRATION_STATUS_FAILED:
|
||||
case MIGRATION_STATUS_CANCELLED:
|
||||
case MIGRATION_STATUS_CANCELLING:
|
||||
ram_save_cancel();
|
||||
if (s->vm_was_running) {
|
||||
if (!runstate_check(RUN_STATE_SHUTDOWN)) {
|
||||
vm_start();
|
||||
|
@@ -64,6 +64,15 @@ typedef struct {
|
||||
bool all_zero;
|
||||
} PostcopyTmpPage;
|
||||
|
||||
typedef struct CgsPrivateFaultReq {
|
||||
RAMBlock *rb;
|
||||
ram_addr_t offset;
|
||||
hwaddr gpa;
|
||||
QemuSemaphore sem;
|
||||
|
||||
QSIMPLEQ_ENTRY(CgsPrivateFaultReq) next_req;
|
||||
} CgsPrivateFaultReq;
|
||||
|
||||
/* State for the incoming migration */
|
||||
struct MigrationIncomingState {
|
||||
QEMUFile *from_src_file;
|
||||
@@ -186,6 +195,20 @@ struct MigrationIncomingState {
|
||||
* contains valid information.
|
||||
*/
|
||||
QemuMutex page_request_mutex;
|
||||
|
||||
/*
|
||||
* One entry per vCPU thread, and indexed by cpu_index.
|
||||
* Support 128 CPUs first.
|
||||
* TODO: remove hardcoding 128.
|
||||
*/
|
||||
CgsPrivateFaultReq private_fault_req[128];
|
||||
QemuSpin req_pending_list_lock;
|
||||
/* List added by vCPU threads and removed by the fault thread */
|
||||
QSIMPLEQ_HEAD(, CgsPrivateFaultReq) private_fault_req_pending_list;
|
||||
|
||||
QemuSpin req_sent_list_lock;
|
||||
/* List added by the fault thread and removed by the ram listen thread */
|
||||
QSIMPLEQ_HEAD(, CgsPrivateFaultReq) private_fault_req_sent_list;
|
||||
};
|
||||
|
||||
MigrationIncomingState *migration_incoming_get_current(void);
|
||||
@@ -451,7 +474,8 @@ void migrate_send_rp_pong(MigrationIncomingState *mis,
|
||||
int migrate_send_rp_req_pages(MigrationIncomingState *mis, RAMBlock *rb,
|
||||
ram_addr_t start, uint64_t haddr);
|
||||
int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
|
||||
RAMBlock *rb, ram_addr_t start);
|
||||
RAMBlock *rb, ram_addr_t start,
|
||||
bool is_private);
|
||||
void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis,
|
||||
char *block_name);
|
||||
void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value);
|
||||
@@ -480,4 +504,9 @@ void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page);
|
||||
bool migrate_multi_channels_is_allowed(void);
|
||||
void migrate_protocol_allow_multi_channels(bool allow);
|
||||
|
||||
void postcopy_add_private_fault_to_pending_list(RAMBlock *rb,
|
||||
ram_addr_t offset,
|
||||
hwaddr gpa,
|
||||
int cpu_index);
|
||||
|
||||
#endif
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user