388 lines
12 KiB
Diff
388 lines
12 KiB
Diff
|
# HG changeset patch
|
||
|
# Parent 427c10f8e1e28d942886f89ebc79ffa93cb7fce9
|
||
|
xenpaging: use wait queues
|
||
|
|
||
|
Use a wait queue to put a guest vcpu to sleep while the requested gfn is
|
||
|
in paging state. This adds missing p2m_mem_paging_populate() calls to
|
||
|
some callers of the new get_gfn* variants, which would crash now
|
||
|
because they get an invalid mfn. It also fixes guest crashes due to
|
||
|
unexpected returns from do_memory_op because copy_to/from_guest ran into
|
||
|
a paged gfn. Now those places will always get a valid mfn.
|
||
|
|
||
|
Since each gfn could be requested by several guest vcpus at the same
|
||
|
time a queue of paged gfns is maintained. Each vcpu will be attached to
|
||
|
that queue. Once p2m_mem_paging_resume restored the gfn the waiting
|
||
|
vcpus will resume execution.
|
||
|
|
||
|
There is untested code in p2m_mem_paging_init_queue() to allow cpu
|
||
|
hotplug. Since each vcpu may wait on a different gfn there have to be as
|
||
|
many queues as vcpus. But xl vcpu-set does not seem to work right now,
|
||
|
so this code path cant be excercised right now.
|
||
|
|
||
|
TODO:
|
||
|
- use hash in p2m_mem_paging_queue_head
|
||
|
- rename gfn_lock
|
||
|
- use mm_lock_t for gfn_lock
|
||
|
|
||
|
Signed-off-by: Olaf Hering <olaf@aepfle.de>
|
||
|
|
||
|
---
|
||
|
xen/arch/x86/hvm/hvm.c | 2
|
||
|
xen/arch/x86/mm/p2m.c | 220 +++++++++++++++++++++++++++++++++------
|
||
|
xen/common/domctl.c | 3
|
||
|
xen/include/asm-x86/hvm/domain.h | 3
|
||
|
xen/include/asm-x86/p2m.h | 7 +
|
||
|
5 files changed, 205 insertions(+), 30 deletions(-)
|
||
|
|
||
|
--- a/xen/arch/x86/hvm/hvm.c
|
||
|
+++ b/xen/arch/x86/hvm/hvm.c
|
||
|
@@ -442,6 +442,8 @@ int hvm_domain_initialise(struct domain
|
||
|
spin_lock_init(&d->arch.hvm_domain.irq_lock);
|
||
|
spin_lock_init(&d->arch.hvm_domain.uc_lock);
|
||
|
|
||
|
+ spin_lock_init(&d->arch.hvm_domain.gfn_lock);
|
||
|
+
|
||
|
INIT_LIST_HEAD(&d->arch.hvm_domain.msixtbl_list);
|
||
|
spin_lock_init(&d->arch.hvm_domain.msixtbl_list_lock);
|
||
|
|
||
|
--- a/xen/arch/x86/mm/p2m.c
|
||
|
+++ b/xen/arch/x86/mm/p2m.c
|
||
|
@@ -30,6 +30,7 @@
|
||
|
#include <asm/p2m.h>
|
||
|
#include <asm/hvm/vmx/vmx.h> /* ept_p2m_init() */
|
||
|
#include <xen/iommu.h>
|
||
|
+#include <xen/wait.h>
|
||
|
#include <asm/mem_event.h>
|
||
|
#include <public/mem_event.h>
|
||
|
#include <asm/mem_sharing.h>
|
||
|
@@ -2839,6 +2840,182 @@ set_shared_p2m_entry(struct p2m_domain *
|
||
|
}
|
||
|
|
||
|
#ifdef __x86_64__
|
||
|
+struct p2m_mem_paging_queue {
|
||
|
+ struct list_head list;
|
||
|
+ struct waitqueue_head wq;
|
||
|
+ unsigned long gfn;
|
||
|
+ unsigned short waiters;
|
||
|
+ unsigned short woken;
|
||
|
+ unsigned short index;
|
||
|
+};
|
||
|
+
|
||
|
+struct p2m_mem_paging_queue_head {
|
||
|
+ struct list_head list;
|
||
|
+ unsigned int max;
|
||
|
+};
|
||
|
+
|
||
|
+int p2m_mem_paging_init_queue(struct domain *d, unsigned int max)
|
||
|
+{
|
||
|
+ struct p2m_mem_paging_queue_head *h;
|
||
|
+ struct p2m_mem_paging_queue *q;
|
||
|
+ unsigned int i, nr;
|
||
|
+ int ret = 0;
|
||
|
+
|
||
|
+ if (!is_hvm_domain(d))
|
||
|
+ return 0;
|
||
|
+
|
||
|
+ spin_lock(&d->arch.hvm_domain.gfn_lock);
|
||
|
+
|
||
|
+ if (!d->arch.hvm_domain.gfn_queue) {
|
||
|
+ ret = -ENOMEM;
|
||
|
+ h = xzalloc(struct p2m_mem_paging_queue_head);
|
||
|
+ if (!h) {
|
||
|
+ domain_crash(d);
|
||
|
+ goto out;
|
||
|
+ }
|
||
|
+
|
||
|
+ INIT_LIST_HEAD(&h->list);
|
||
|
+ nr = max;
|
||
|
+ } else {
|
||
|
+ h = d->arch.hvm_domain.gfn_queue;
|
||
|
+ if (max <= h->max)
|
||
|
+ goto out;
|
||
|
+ nr = max - h->max;
|
||
|
+ }
|
||
|
+
|
||
|
+ ret = -ENOMEM;
|
||
|
+ q = xzalloc_array(struct p2m_mem_paging_queue, nr);
|
||
|
+ if (!q) {
|
||
|
+ if (!d->arch.hvm_domain.gfn_queue)
|
||
|
+ xfree(h);
|
||
|
+ domain_crash(d);
|
||
|
+ goto out;
|
||
|
+ }
|
||
|
+
|
||
|
+ for (i = 0; i < nr; i++) {
|
||
|
+ init_waitqueue_head(&q[i].wq);
|
||
|
+ INIT_LIST_HEAD(&q[i].list);
|
||
|
+ q[i].index = h->max + i + 1;
|
||
|
+ list_add_tail(&q[i].list, &h->list);
|
||
|
+ }
|
||
|
+
|
||
|
+ h->max = max;
|
||
|
+ d->arch.hvm_domain.gfn_queue = h;
|
||
|
+ ret = 0;
|
||
|
+
|
||
|
+out:
|
||
|
+ spin_unlock(&d->arch.hvm_domain.gfn_lock);
|
||
|
+ return ret;
|
||
|
+}
|
||
|
+
|
||
|
+static struct p2m_mem_paging_queue *p2m_mem_paging_get_queue(struct domain *d, unsigned long gfn)
|
||
|
+{
|
||
|
+ struct p2m_mem_paging_queue_head *h;
|
||
|
+ struct p2m_mem_paging_queue *q, *q_match, *q_free;
|
||
|
+
|
||
|
+ h = d->arch.hvm_domain.gfn_queue;
|
||
|
+ q_match = q_free = NULL;
|
||
|
+
|
||
|
+ spin_lock(&d->arch.hvm_domain.gfn_lock);
|
||
|
+
|
||
|
+ list_for_each_entry(q, &h->list, list) {
|
||
|
+ if (q->gfn == gfn) {
|
||
|
+ q_match = q;
|
||
|
+ break;
|
||
|
+ }
|
||
|
+ if (!q_free && !q->waiters)
|
||
|
+ q_free = q;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (!q_match && q_free)
|
||
|
+ q_match = q_free;
|
||
|
+
|
||
|
+ if (q_match) {
|
||
|
+ if (q_match->woken)
|
||
|
+ printk("wq woken for gfn %u:%u %lx %u %u %u\n", current->domain->domain_id, current->vcpu_id, gfn, q_match->index, q_match->woken, q_match->waiters);
|
||
|
+ q_match->waiters++;
|
||
|
+ q_match->gfn = gfn;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (!q_match)
|
||
|
+ printk("No wq_get for gfn %u:%u %lx\n", current->domain->domain_id, current->vcpu_id, gfn);
|
||
|
+
|
||
|
+ spin_unlock(&d->arch.hvm_domain.gfn_lock);
|
||
|
+ return q_match;
|
||
|
+}
|
||
|
+
|
||
|
+static void p2m_mem_paging_put_queue(struct domain *d, struct p2m_mem_paging_queue *q_match)
|
||
|
+{
|
||
|
+ spin_lock(&d->arch.hvm_domain.gfn_lock);
|
||
|
+
|
||
|
+ if (q_match->waiters == 0)
|
||
|
+ printk("wq_put no waiters, gfn %u:%u %lx %u\n", current->domain->domain_id, current->vcpu_id, q_match->gfn, q_match->woken);
|
||
|
+ else if (--q_match->waiters == 0)
|
||
|
+ q_match->gfn = q_match->woken = 0;;
|
||
|
+
|
||
|
+ spin_unlock(&d->arch.hvm_domain.gfn_lock);
|
||
|
+}
|
||
|
+
|
||
|
+static void p2m_mem_paging_wake_queue(struct domain *d, unsigned long gfn)
|
||
|
+{
|
||
|
+ struct p2m_mem_paging_queue_head *h;
|
||
|
+ struct p2m_mem_paging_queue *q, *q_match = NULL;
|
||
|
+
|
||
|
+ spin_lock(&d->arch.hvm_domain.gfn_lock);
|
||
|
+
|
||
|
+ h = d->arch.hvm_domain.gfn_queue;
|
||
|
+ list_for_each_entry(q, &h->list, list) {
|
||
|
+ if (q->gfn == gfn) {
|
||
|
+ q_match = q;
|
||
|
+ break;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ if (q_match) {
|
||
|
+ if (q_match->woken || q_match->waiters == 0)
|
||
|
+ printk("Wrong wake for gfn %u:%u %p %lx %u %u\n", current->domain->domain_id, current->vcpu_id, q_match, gfn, q_match->woken, q_match->waiters);
|
||
|
+ q_match->woken++;
|
||
|
+ wake_up_all(&q_match->wq);
|
||
|
+ }
|
||
|
+ spin_unlock(&d->arch.hvm_domain.gfn_lock);
|
||
|
+}
|
||
|
+
|
||
|
+/* Returns 0 if the gfn is still paged */
|
||
|
+static int p2m_mem_paging_get_entry(mfn_t *mfn,
|
||
|
+ struct p2m_domain *p2m, unsigned long gfn,
|
||
|
+ p2m_type_t *t, p2m_query_t q)
|
||
|
+{
|
||
|
+ p2m_access_t a = 0;
|
||
|
+ *mfn = p2m->get_entry(p2m, gfn, t, &a, q);
|
||
|
+
|
||
|
+ return p2m_is_paging(*t) ? 0 : 1;
|
||
|
+}
|
||
|
+
|
||
|
+/* Go to sleep in case of guest access */
|
||
|
+void p2m_mem_paging_wait(mfn_t *mfn,
|
||
|
+ struct p2m_domain *p2m, unsigned long gfn,
|
||
|
+ p2m_type_t *t, p2m_query_t q)
|
||
|
+{
|
||
|
+ struct p2m_mem_paging_queue *pmpq;
|
||
|
+
|
||
|
+ /* Return p2mt as is in case of query */
|
||
|
+ if ( q == p2m_query )
|
||
|
+ return;
|
||
|
+ /* Foreign domains can not go to sleep */
|
||
|
+ if ( current->domain != p2m->domain )
|
||
|
+ return;
|
||
|
+
|
||
|
+ pmpq = p2m_mem_paging_get_queue(p2m->domain, gfn);
|
||
|
+ if ( !pmpq )
|
||
|
+ return;
|
||
|
+
|
||
|
+ /* Populate the page once */
|
||
|
+ if ( *t == p2m_ram_paging_out || *t == p2m_ram_paged )
|
||
|
+ p2m_mem_paging_populate(p2m, gfn);
|
||
|
+
|
||
|
+ wait_event(pmpq->wq, p2m_mem_paging_get_entry(mfn, p2m, gfn, t, q));
|
||
|
+ p2m_mem_paging_put_queue(p2m->domain, pmpq);
|
||
|
+}
|
||
|
+
|
||
|
/**
|
||
|
* p2m_mem_paging_nominate - Mark a guest page as to-be-paged-out
|
||
|
* @d: guest domain
|
||
|
@@ -3020,21 +3197,17 @@ void p2m_mem_paging_drop_page(struct p2m
|
||
|
*/
|
||
|
void p2m_mem_paging_populate(struct p2m_domain *p2m, unsigned long gfn)
|
||
|
{
|
||
|
- struct vcpu *v = current;
|
||
|
- mem_event_request_t req;
|
||
|
+ mem_event_request_t req = { .type = MEM_EVENT_TYPE_PAGING, .gfn = gfn };
|
||
|
p2m_type_t p2mt;
|
||
|
p2m_access_t a;
|
||
|
mfn_t mfn;
|
||
|
- int restored = 0;
|
||
|
struct domain *d = p2m->domain;
|
||
|
+ int put_request = 0;
|
||
|
|
||
|
/* Check that there's space on the ring for this request */
|
||
|
if ( mem_event_claim_slot(d, &d->mem_event->paging) )
|
||
|
return;
|
||
|
|
||
|
- memset(&req, 0, sizeof(req));
|
||
|
- req.type = MEM_EVENT_TYPE_PAGING;
|
||
|
-
|
||
|
/* Fix p2m mapping */
|
||
|
p2m_lock(p2m);
|
||
|
mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, p2m_query);
|
||
|
@@ -3043,35 +3216,23 @@ void p2m_mem_paging_populate(struct p2m_
|
||
|
{
|
||
|
/* Restore page state if gfn was requested before evict */
|
||
|
if ( p2mt == p2m_ram_paging_out && mfn_valid(mfn) ) {
|
||
|
+ /* Restore gfn because it is needed by guest before evict */
|
||
|
set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_rw, a);
|
||
|
- restored = 1;
|
||
|
} else {
|
||
|
set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_paging_in_start, a);
|
||
|
+ put_request = 1;
|
||
|
}
|
||
|
+ /* Evict will fail now, the pager has to try another gfn */
|
||
|
+
|
||
|
audit_p2m(p2m, 1);
|
||
|
}
|
||
|
p2m_unlock(p2m);
|
||
|
|
||
|
- /* Pause domain if request came from guest and gfn has paging type */
|
||
|
- if ( !restored && p2m_is_paging(p2mt) && v->domain == d )
|
||
|
- {
|
||
|
- vcpu_pause_nosync(v);
|
||
|
- req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
|
||
|
- }
|
||
|
- /* No need to inform pager if the gfn is not in the page-out path */
|
||
|
- else if ( restored || !p2m_do_populate(p2mt) )
|
||
|
- {
|
||
|
- /* gfn is already on its way back and vcpu is not paused */
|
||
|
+ /* One request per gfn, guest vcpus go to sleep, foreigners try again */
|
||
|
+ if ( put_request )
|
||
|
+ mem_event_put_request(d, &d->mem_event->paging, &req);
|
||
|
+ else
|
||
|
mem_event_release_slot(d, &d->mem_event->paging);
|
||
|
- return;
|
||
|
- }
|
||
|
-
|
||
|
- /* Send request to pager */
|
||
|
- req.gfn = gfn;
|
||
|
- req.p2mt = p2mt;
|
||
|
- req.vcpu_id = v->vcpu_id;
|
||
|
-
|
||
|
- mem_event_put_request(d, &d->mem_event->paging, &req);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
@@ -3197,12 +3358,11 @@ void p2m_mem_paging_resume(struct p2m_do
|
||
|
p2m_unlock(p2m);
|
||
|
}
|
||
|
|
||
|
- /* Unpause domain */
|
||
|
- if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
|
||
|
- vcpu_unpause(d->vcpu[rsp.vcpu_id]);
|
||
|
-
|
||
|
/* Wake vcpus waiting for room in the ring */
|
||
|
mem_event_wake_requesters(&d->mem_event->paging);
|
||
|
+
|
||
|
+ /* Unpause all vcpus that were paused because the gfn was paged */
|
||
|
+ p2m_mem_paging_wake_queue(d, rsp.gfn);
|
||
|
}
|
||
|
|
||
|
void p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long gla,
|
||
|
--- a/xen/common/domctl.c
|
||
|
+++ b/xen/common/domctl.c
|
||
|
@@ -536,6 +536,9 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
|
||
|
goto maxvcpu_out;
|
||
|
}
|
||
|
|
||
|
+ if ( p2m_mem_paging_init_queue(d, max) )
|
||
|
+ goto maxvcpu_out;
|
||
|
+
|
||
|
ret = 0;
|
||
|
|
||
|
maxvcpu_out:
|
||
|
--- a/xen/include/asm-x86/hvm/domain.h
|
||
|
+++ b/xen/include/asm-x86/hvm/domain.h
|
||
|
@@ -87,6 +87,9 @@ struct hvm_domain {
|
||
|
|
||
|
struct viridian_domain viridian;
|
||
|
|
||
|
+ spinlock_t gfn_lock;
|
||
|
+ struct p2m_mem_paging_queue_head *gfn_queue;
|
||
|
+
|
||
|
bool_t hap_enabled;
|
||
|
bool_t mem_sharing_enabled;
|
||
|
bool_t qemu_mapcache_invalidate;
|
||
|
--- a/xen/include/asm-x86/p2m.h
|
||
|
+++ b/xen/include/asm-x86/p2m.h
|
||
|
@@ -343,6 +343,8 @@ gfn_to_mfn_type_p2m(struct p2m_domain *p
|
||
|
}
|
||
|
|
||
|
|
||
|
+extern void p2m_mem_paging_wait(mfn_t *mfn, struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *t, p2m_query_t q);
|
||
|
+
|
||
|
/* General conversion function from gfn to mfn */
|
||
|
static inline mfn_t _gfn_to_mfn_type(struct p2m_domain *p2m,
|
||
|
unsigned long gfn, p2m_type_t *t,
|
||
|
@@ -364,6 +366,9 @@ static inline mfn_t _gfn_to_mfn_type(str
|
||
|
mfn = gfn_to_mfn_type_p2m(p2m, gfn, t, q);
|
||
|
|
||
|
#ifdef __x86_64__
|
||
|
+ if (unlikely(p2m_is_paging(*t)) )
|
||
|
+ p2m_mem_paging_wait(&mfn, p2m, gfn, t, q);
|
||
|
+
|
||
|
if (unlikely((p2m_is_broken(*t))))
|
||
|
{
|
||
|
/* Return invalid_mfn to avoid caller's access */
|
||
|
@@ -520,6 +525,8 @@ int clear_mmio_p2m_entry(struct p2m_doma
|
||
|
/* Modify p2m table for shared gfn */
|
||
|
int set_shared_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn);
|
||
|
|
||
|
+/* Initialize per-gfn wait queue */
|
||
|
+int p2m_mem_paging_init_queue(struct domain *d, unsigned int max);
|
||
|
/* Check if a nominated gfn is valid to be paged out */
|
||
|
int p2m_mem_paging_nominate(struct p2m_domain *p2m, unsigned long gfn);
|
||
|
/* Evict a frame */
|
||
|
@@ -533,6 +540,8 @@ int p2m_mem_paging_prep(struct p2m_domai
|
||
|
/* Resume normal operation (in case a domain was paused) */
|
||
|
void p2m_mem_paging_resume(struct p2m_domain *p2m);
|
||
|
#else
|
||
|
+static inline int p2m_mem_paging_init_queue(struct domain *d, unsigned int max)
|
||
|
+{ return 0; }
|
||
|
static inline void p2m_mem_paging_drop_page(struct p2m_domain *p2m, unsigned long gfn)
|
||
|
{ }
|
||
|
static inline void p2m_mem_paging_populate(struct p2m_domain *p2m, unsigned long gfn)
|