xen/xenpaging.mem_event-use-wait_queue.patch

560 lines
18 KiB
Diff
Raw Normal View History

# HG changeset patch
# Parent aa97fafb53fc95aaec8d9890635d14304f24c362
mem_event: use wait queue when ring is full
This change is based on an idea/patch from Adin Scannell.
If the ring is full, put the current vcpu to sleep if it belongs to the
target domain. The wakeup happens in the p2m_*_resume functions. Wakeup
will take the number of free slots into account.
A request from foreign domain has to succeed once a slot was claimed
because such vcpus can not sleep.
This change fixes also a bug in p2m_mem_paging_drop_page(). Up to now a
full ring will lead to harmless inconsistency in the pager.
v6:
- take foreign requests into account before calling wake_up_nr()
- call wake_up_nr() outside of ring lock
- rename ->bit to ->pause_flag
v5:
- rename mem_event_check_ring() to mem_event_claim_slot()
- rename mem_event_put_req_producers() to mem_event_release_slot()
- add local/foreign request accounting
- keep room for at least one guest request
v4:
- fix off-by-one bug in _mem_event_put_request
- add mem_event_wake_requesters() and use wake_up_nr()
- rename mem_event_mark_and_pause() and mem_event_mark_and_pause() functions
- req_producers counts foreign request producers, rename member
v3:
- rename ->mem_event_bit to ->bit
- remove me_ from new VPF_ defines
v2:
- p2m_mem_paging_populate: move vcpu_pause after put_request, otherwise the
vcpu will not wake_up after a wait_event because the pause_count was
increased twice. Fixes guest hangs.
- update free space check in _mem_event_put_request()
- simplify mem_event_put_request()
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
xen/arch/x86/hvm/hvm.c | 4 -
xen/arch/x86/mm/mem_event.c | 147 ++++++++++++++++++++++++++++++++++------
xen/arch/x86/mm/mem_sharing.c | 46 ++++--------
xen/arch/x86/mm/p2m.c | 36 ++++-----
xen/include/asm-x86/mem_event.h | 10 +-
xen/include/xen/sched.h | 17 +++-
6 files changed, 179 insertions(+), 81 deletions(-)
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3915,8 +3915,8 @@ static int hvm_memory_event_traps(long p
if ( (p & HVMPME_onchangeonly) && (value == old) )
return 1;
- rc = mem_event_check_ring(d, &d->mem_event->access);
- if ( rc )
+ rc = mem_event_claim_slot(d, &d->mem_event->access);
+ if ( rc < 0 )
return rc;
memset(&req, 0, sizeof(req));
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -23,6 +23,7 @@
#include <asm/domain.h>
#include <xen/event.h>
+#include <xen/wait.h>
#include <asm/p2m.h>
#include <asm/mem_event.h>
#include <asm/mem_paging.h>
@@ -39,6 +40,7 @@
static int mem_event_enable(struct domain *d,
xen_domctl_mem_event_op_t *mec,
+ int pause_flag,
struct mem_event_domain *med)
{
int rc;
@@ -94,8 +96,12 @@ static int mem_event_enable(struct domai
mem_event_ring_lock_init(med);
+ med->pause_flag = pause_flag;
+
+ init_waitqueue_head(&med->wq);
+
/* Wake any VCPUs paused for memory events */
- mem_event_unpause_vcpus(d);
+ mem_event_wake_waiters(d, med);
return 0;
@@ -111,6 +117,9 @@ static int mem_event_enable(struct domai
static int mem_event_disable(struct mem_event_domain *med)
{
+ if (!list_empty(&med->wq.list))
+ return -EBUSY;
+
unmap_domain_page(med->ring_page);
med->ring_page = NULL;
@@ -120,13 +129,24 @@ static int mem_event_disable(struct mem_
return 0;
}
-void mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req)
+static int _mem_event_put_request(struct domain *d,
+ struct mem_event_domain *med,
+ mem_event_request_t *req)
{
mem_event_front_ring_t *front_ring;
+ int free_req, claimed_req;
RING_IDX req_prod;
mem_event_ring_lock(med);
+ free_req = RING_FREE_REQUESTS(&med->front_ring);
+ /* Foreign requests must succeed because their vcpus can not sleep */
+ claimed_req = med->foreign_producers;
+ if ( !free_req || ( current->domain == d && free_req <= claimed_req ) ) {
+ mem_event_ring_unlock(med);
+ return 0;
+ }
+
front_ring = &med->front_ring;
req_prod = front_ring->req_prod_pvt;
@@ -134,14 +154,35 @@ void mem_event_put_request(struct domain
memcpy(RING_GET_REQUEST(front_ring, req_prod), req, sizeof(*req));
req_prod++;
+ /* Update accounting */
+ if ( current->domain == d )
+ med->target_producers--;
+ else
+ med->foreign_producers--;
+
/* Update ring */
- med->req_producers--;
front_ring->req_prod_pvt = req_prod;
RING_PUSH_REQUESTS(front_ring);
mem_event_ring_unlock(med);
notify_via_xen_event_channel(d, med->xen_port);
+
+ return 1;
+}
+
+void mem_event_put_request(struct domain *d, struct mem_event_domain *med,
+ mem_event_request_t *req)
+{
+ /* Go to sleep if request came from guest */
+ if (current->domain == d) {
+ wait_event(med->wq, _mem_event_put_request(d, med, req));
+ return;
+ }
+ /* Ring was full anyway, unable to sleep in non-guest context */
+ if (!_mem_event_put_request(d, med, req))
+ printk("Failed to put memreq: d %u t %x f %x gfn %lx\n", d->domain_id,
+ req->type, req->flags, (unsigned long)req->gfn);
}
void mem_event_get_response(struct mem_event_domain *med, mem_event_response_t *rsp)
@@ -165,32 +206,97 @@ void mem_event_get_response(struct mem_e
mem_event_ring_unlock(med);
}
-void mem_event_unpause_vcpus(struct domain *d)
+/**
+ * mem_event_wake_requesters - Wake vcpus waiting for room in the ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * mem_event_wake_requesters() will wakeup vcpus waiting for room in the
+ * ring. Only as many as can place another request in the ring will
+ * resume execution.
+ */
+void mem_event_wake_requesters(struct mem_event_domain *med)
+{
+ int free_req;
+
+ mem_event_ring_lock(med);
+ free_req = RING_FREE_REQUESTS(&med->front_ring);
+ free_req -= med->foreign_producers;
+ mem_event_ring_unlock(med);
+
+ if ( free_req )
+ wake_up_nr(&med->wq, free_req);
+}
+
+/**
+ * mem_event_wake_waiters - Wake all vcpus waiting for the ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * mem_event_wake_waiters() will wakeup all vcpus waiting for the ring to
+ * become available.
+ */
+void mem_event_wake_waiters(struct domain *d, struct mem_event_domain *med)
{
struct vcpu *v;
for_each_vcpu ( d, v )
- if ( test_and_clear_bit(_VPF_mem_event, &v->pause_flags) )
+ if ( test_and_clear_bit(med->pause_flag, &v->pause_flags) )
vcpu_wake(v);
}
-void mem_event_mark_and_pause(struct vcpu *v)
+/**
+ * mem_event_mark_and_sleep - Put vcpu to sleep
+ * @v: guest vcpu
+ * @med: mem_event ring
+ *
+ * mem_event_mark_and_sleep() tags vcpu and put it to sleep.
+ * The vcpu will resume execution in mem_event_wake_waiters().
+ */
+void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain *med)
{
- set_bit(_VPF_mem_event, &v->pause_flags);
+ set_bit(med->pause_flag, &v->pause_flags);
vcpu_sleep_nosync(v);
}
-void mem_event_put_req_producers(struct mem_event_domain *med)
+/**
+ * mem_event_release_slot - Release a claimed slot
+ * @med: mem_event ring
+ *
+ * mem_event_release_slot() releases a claimed slot in the mem_event ring.
+ */
+void mem_event_release_slot(struct domain *d, struct mem_event_domain *med)
{
mem_event_ring_lock(med);
- med->req_producers--;
+ if ( current->domain == d )
+ med->target_producers--;
+ else
+ med->foreign_producers--;
mem_event_ring_unlock(med);
}
-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med)
+/**
+ * mem_event_claim_slot - Check state of a mem_event ring
+ * @d: guest domain
+ * @med: mem_event ring
+ *
+ * Return codes: < 0: the ring is not yet configured
+ * 0: the ring has some room
+ * > 0: the ring is full
+ *
+ * mem_event_claim_slot() checks the state of the given mem_event ring.
+ * If the current vcpu belongs to the guest domain, the function assumes that
+ * mem_event_put_request() will sleep until the ring has room again.
+ * A guest can always place at least one request.
+ *
+ * If the current vcpu does not belong to the target domain the caller must try
+ * again until there is room. A slot is claimed and the caller can place a
+ * request. If the caller does not need to send a request, the claimed slot has
+ * to be released with mem_event_release_slot().
+ */
+int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med)
{
- struct vcpu *curr = current;
- int free_requests;
+ int free_req;
int ring_full = 1;
if ( !med->ring_page )
@@ -198,16 +304,17 @@ int mem_event_check_ring(struct domain *
mem_event_ring_lock(med);
- free_requests = RING_FREE_REQUESTS(&med->front_ring);
- if ( med->req_producers < free_requests )
+ free_req = RING_FREE_REQUESTS(&med->front_ring);
+
+ if ( current->domain == d ) {
+ med->target_producers++;
+ ring_full = 0;
+ } else if ( med->foreign_producers + med->target_producers + 1 < free_req )
{
- med->req_producers++;
+ med->foreign_producers++;
ring_full = 0;
}
- if ( ring_full && (curr->domain == d) )
- mem_event_mark_and_pause(curr);
-
mem_event_ring_unlock(med);
return ring_full;
@@ -283,7 +390,7 @@ int mem_event_domctl(struct domain *d, x
break;
}
- rc = mem_event_enable(d, mec, med);
+ rc = mem_event_enable(d, mec, _VPF_mem_paging, med);
}
break;
@@ -322,7 +429,7 @@ int mem_event_domctl(struct domain *d, x
if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
break;
- rc = mem_event_enable(d, mec, med);
+ rc = mem_event_enable(d, mec, _VPF_mem_access, med);
}
break;
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -292,44 +292,32 @@ static void mem_sharing_audit(void)
#endif
-static struct page_info* mem_sharing_alloc_page(struct domain *d,
- unsigned long gfn,
- int must_succeed)
+static void mem_sharing_notify_helper(struct domain *d, unsigned long gfn)
{
- struct page_info* page;
struct vcpu *v = current;
- mem_event_request_t req;
+ mem_event_request_t req = { .type = MEM_EVENT_TYPE_SHARED };
- page = alloc_domheap_page(d, 0);
- if(page != NULL) return page;
-
- memset(&req, 0, sizeof(req));
- req.type = MEM_EVENT_TYPE_SHARED;
-
- if(must_succeed)
+ if ( v->domain != d )
{
- /* We do not support 'must_succeed' any more. External operations such
- * as grant table mappings may fail with OOM condition!
- */
- BUG();
- }
- else
- {
- /* All foreign attempts to unshare pages should be handled through
- * 'must_succeed' case. */
- ASSERT(v->domain->domain_id == d->domain_id);
- vcpu_pause_nosync(v);
- req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
+ /* XXX This path needs some attention. For now, just fail foreign
+ * XXX requests to unshare if there's no memory. This replaces
+ * XXX old code that BUG()ed here; the callers now BUG()
+ * XXX elewhere. */
+ gdprintk(XENLOG_ERR,
+ "Failed alloc on unshare path for foreign (%d) lookup\n",
+ d->domain_id);
+ return;
}
- if(mem_event_check_ring(d, &d->mem_event->share)) return page;
+ if (mem_event_claim_slot(d, &d->mem_event->share) < 0)
+ return;
+ req.flags = MEM_EVENT_FLAG_VCPU_PAUSED;
req.gfn = gfn;
req.p2mt = p2m_ram_shared;
req.vcpu_id = v->vcpu_id;
mem_event_put_request(d, &d->mem_event->share, &req);
-
- return page;
+ vcpu_pause_nosync(v);
}
unsigned int mem_sharing_get_nr_saved_mfns(void)
@@ -692,14 +680,14 @@ gfn_found:
if(ret == 0) goto private_page_found;
old_page = page;
- page = mem_sharing_alloc_page(d, gfn, flags & MEM_SHARING_MUST_SUCCEED);
- BUG_ON(!page && (flags & MEM_SHARING_MUST_SUCCEED));
+ page = alloc_domheap_page(d, 0);
if(!page)
{
/* We've failed to obtain memory for private page. Need to re-add the
* gfn_info to relevant list */
list_add(&gfn_info->list, &hash_entry->gfns);
shr_unlock();
+ mem_sharing_notify_helper(d, gfn);
return -ENOMEM;
}
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -2988,21 +2988,13 @@ int p2m_mem_paging_evict(struct p2m_doma
*/
void p2m_mem_paging_drop_page(struct p2m_domain *p2m, unsigned long gfn)
{
- struct vcpu *v = current;
- mem_event_request_t req;
+ mem_event_request_t req = { .type = MEM_EVENT_TYPE_PAGING, .gfn = gfn };
struct domain *d = p2m->domain;
- /* Check that there's space on the ring for this request */
- if ( mem_event_check_ring(d, &d->mem_event->paging) == 0)
- {
- /* Send release notification to pager */
- memset(&req, 0, sizeof(req));
- req.flags |= MEM_EVENT_FLAG_DROP_PAGE;
- req.gfn = gfn;
- req.vcpu_id = v->vcpu_id;
+ /* Send release notification to pager */
+ req.flags = MEM_EVENT_FLAG_DROP_PAGE;
- mem_event_put_request(d, &d->mem_event->paging, &req);
- }
+ mem_event_put_request(d, &d->mem_event->paging, &req);
}
/**
@@ -3037,7 +3029,7 @@ void p2m_mem_paging_populate(struct p2m_
struct domain *d = p2m->domain;
/* Check that there's space on the ring for this request */
- if ( mem_event_check_ring(d, &d->mem_event->paging) )
+ if ( mem_event_claim_slot(d, &d->mem_event->paging) )
return;
memset(&req, 0, sizeof(req));
@@ -3070,7 +3062,7 @@ void p2m_mem_paging_populate(struct p2m_
else if ( restored || !p2m_do_populate(p2mt) )
{
/* gfn is already on its way back and vcpu is not paused */
- mem_event_put_req_producers(&d->mem_event->paging);
+ mem_event_release_slot(d, &d->mem_event->paging);
return;
}
@@ -3209,8 +3201,8 @@ void p2m_mem_paging_resume(struct p2m_do
if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
vcpu_unpause(d->vcpu[rsp.vcpu_id]);
- /* Unpause any domains that were paused because the ring was full */
- mem_event_unpause_vcpus(d);
+ /* Wake vcpus waiting for room in the ring */
+ mem_event_wake_requesters(&d->mem_event->paging);
}
void p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, unsigned long gla,
@@ -3239,7 +3231,7 @@ void p2m_mem_access_check(unsigned long
p2m_unlock(p2m);
/* Otherwise, check if there is a memory event listener, and send the message along */
- res = mem_event_check_ring(d, &d->mem_event->access);
+ res = mem_event_claim_slot(d, &d->mem_event->access);
if ( res < 0 )
{
/* No listener */
@@ -3249,7 +3241,7 @@ void p2m_mem_access_check(unsigned long
"Memory access permissions failure, no mem_event listener: pausing VCPU %d, dom %d\n",
v->vcpu_id, d->domain_id);
- mem_event_mark_and_pause(v);
+ mem_event_mark_and_sleep(v, &d->mem_event->access);
}
else
{
@@ -3299,9 +3291,11 @@ void p2m_mem_access_resume(struct p2m_do
if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )
vcpu_unpause(d->vcpu[rsp.vcpu_id]);
- /* Unpause any domains that were paused because the ring was full or no listener
- * was available */
- mem_event_unpause_vcpus(d);
+ /* Wake vcpus waiting for room in the ring */
+ mem_event_wake_requesters(&d->mem_event->access);
+
+ /* Unpause all vcpus that were paused because no listener was available */
+ mem_event_wake_waiters(d, &d->mem_event->access);
}
#endif /* __x86_64__ */
--- a/xen/include/asm-x86/mem_event.h
+++ b/xen/include/asm-x86/mem_event.h
@@ -24,13 +24,13 @@
#ifndef __MEM_EVENT_H__
#define __MEM_EVENT_H__
-/* Pauses VCPU while marking pause flag for mem event */
-void mem_event_mark_and_pause(struct vcpu *v);
-int mem_event_check_ring(struct domain *d, struct mem_event_domain *med);
-void mem_event_put_req_producers(struct mem_event_domain *med);
+int mem_event_claim_slot(struct domain *d, struct mem_event_domain *med);
+void mem_event_release_slot(struct domain *d, struct mem_event_domain *med);
void mem_event_put_request(struct domain *d, struct mem_event_domain *med, mem_event_request_t *req);
void mem_event_get_response(struct mem_event_domain *med, mem_event_response_t *rsp);
-void mem_event_unpause_vcpus(struct domain *d);
+void mem_event_wake_requesters(struct mem_event_domain *med);
+void mem_event_wake_waiters(struct domain *d, struct mem_event_domain *med);
+void mem_event_mark_and_sleep(struct vcpu *v, struct mem_event_domain *med);
int mem_event_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
XEN_GUEST_HANDLE(void) u_domctl);
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -26,6 +26,7 @@
#include <xen/cpumask.h>
#include <xen/nodemask.h>
#include <xen/multicall.h>
+#include <xen/wait.h>
#ifdef CONFIG_COMPAT
#include <compat/vcpu.h>
@@ -190,7 +191,8 @@ struct mem_event_domain
{
/* ring lock */
spinlock_t ring_lock;
- unsigned int req_producers;
+ unsigned short foreign_producers;
+ unsigned short target_producers;
/* shared page */
mem_event_shared_page_t *shared_page;
/* shared ring page */
@@ -199,6 +201,10 @@ struct mem_event_domain
mem_event_front_ring_t front_ring;
/* event channel port (vcpu0 only) */
int xen_port;
+ /* mem_event bit for vcpu->pause_flags */
+ int pause_flag;
+ /* list of vcpus waiting for room in the ring */
+ struct waitqueue_head wq;
};
struct mem_event_per_domain
@@ -601,9 +607,12 @@ extern struct domain *domain_list;
/* VCPU affinity has changed: migrating to a new CPU. */
#define _VPF_migrating 3
#define VPF_migrating (1UL<<_VPF_migrating)
- /* VCPU is blocked on memory-event ring. */
-#define _VPF_mem_event 4
-#define VPF_mem_event (1UL<<_VPF_mem_event)
+ /* VCPU is blocked due to missing mem_paging ring. */
+#define _VPF_mem_paging 4
+#define VPF_mem_paging (1UL<<_VPF_mem_paging)
+ /* VCPU is blocked due to missing mem_access ring. */
+#define _VPF_mem_access 5
+#define VPF_mem_access (1UL<<_VPF_mem_access)
static inline int vcpu_runnable(struct vcpu *v)
{