285 lines
9.5 KiB
Diff
285 lines
9.5 KiB
Diff
|
# Commit dff515dfeac4c1c13422a128c558ac21ddc6c8db
|
||
|
# Date 2015-06-19 11:01:24 +0200
|
||
|
# Author Malcolm Crossley <malcolm.crossley@citrix.com>
|
||
|
# Committer Jan Beulich <jbeulich@suse.com>
|
||
|
gnttab: use per-VCPU maptrack free lists
|
||
|
|
||
|
Performance analysis of aggregate network throughput with many VMs
|
||
|
shows that performance is signficantly limited by contention on the
|
||
|
maptrack lock when obtaining/releasing maptrack handles from the free
|
||
|
list.
|
||
|
|
||
|
Instead of a single free list use a per-VCPU list. This avoids any
|
||
|
contention when obtaining a handle. Handles must be released back to
|
||
|
their original list and since this may occur on a different VCPU there
|
||
|
is some contention on the destination VCPU's free list tail pointer
|
||
|
(but this is much better than a per-domain lock).
|
||
|
|
||
|
Increase the default maximum number of maptrack frames by 4 times
|
||
|
because: a) struct grant_mapping is now 16 bytes (instead of 8); and
|
||
|
b) a guest may not evenly distribute all the grant map operations
|
||
|
across the VCPUs (meaning some VCPUs need more maptrack entries than
|
||
|
others).
|
||
|
|
||
|
Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>
|
||
|
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
|
||
|
Reviewed-by: Jan Beulich <jbeulich@suse.com>
|
||
|
|
||
|
--- sle12sp1.orig/xen/common/domain.c 2015-07-08 00:00:00.000000000 +0200
|
||
|
+++ sle12sp1/xen/common/domain.c 2015-07-08 13:52:23.000000000 +0200
|
||
|
@@ -126,6 +126,8 @@ struct vcpu *alloc_vcpu(
|
||
|
|
||
|
tasklet_init(&v->continue_hypercall_tasklet, NULL, 0);
|
||
|
|
||
|
+ grant_table_init_vcpu(v);
|
||
|
+
|
||
|
if ( !zalloc_cpumask_var(&v->cpu_hard_affinity) ||
|
||
|
!zalloc_cpumask_var(&v->cpu_hard_affinity_tmp) ||
|
||
|
!zalloc_cpumask_var(&v->cpu_hard_affinity_saved) ||
|
||
|
--- sle12sp1.orig/xen/common/grant_table.c 2015-07-08 13:49:47.000000000 +0200
|
||
|
+++ sle12sp1/xen/common/grant_table.c 2015-07-08 13:52:23.000000000 +0200
|
||
|
@@ -37,6 +37,7 @@
|
||
|
#include <xen/iommu.h>
|
||
|
#include <xen/paging.h>
|
||
|
#include <xen/keyhandler.h>
|
||
|
+#include <xen/vmap.h>
|
||
|
#include <xsm/xsm.h>
|
||
|
#include <asm/flushtlb.h>
|
||
|
|
||
|
@@ -57,7 +58,7 @@ integer_param("gnttab_max_frames", max_g
|
||
|
* New options allow to set max_maptrack_frames and
|
||
|
* map_grant_table_frames independently.
|
||
|
*/
|
||
|
-#define DEFAULT_MAX_MAPTRACK_FRAMES 256
|
||
|
+#define DEFAULT_MAX_MAPTRACK_FRAMES 1024
|
||
|
|
||
|
static unsigned int __read_mostly max_maptrack_frames;
|
||
|
integer_param("gnttab_max_maptrack_frames", max_maptrack_frames);
|
||
|
@@ -279,62 +280,103 @@ double_gt_unlock(struct grant_table *lgt
|
||
|
|
||
|
static inline int
|
||
|
__get_maptrack_handle(
|
||
|
- struct grant_table *t)
|
||
|
+ struct grant_table *t,
|
||
|
+ struct vcpu *v)
|
||
|
{
|
||
|
- unsigned int h;
|
||
|
- if ( unlikely((h = t->maptrack_head) == MAPTRACK_TAIL) )
|
||
|
+ unsigned int head, next;
|
||
|
+
|
||
|
+ /* No maptrack pages allocated for this VCPU yet? */
|
||
|
+ head = v->maptrack_head;
|
||
|
+ if ( unlikely(head == MAPTRACK_TAIL) )
|
||
|
return -1;
|
||
|
- t->maptrack_head = maptrack_entry(t, h).ref;
|
||
|
- return h;
|
||
|
+
|
||
|
+ /*
|
||
|
+ * Always keep one entry in the free list to make it easier to add
|
||
|
+ * free entries to the tail.
|
||
|
+ */
|
||
|
+ next = read_atomic(&maptrack_entry(t, head).ref);
|
||
|
+ if ( unlikely(next == MAPTRACK_TAIL) )
|
||
|
+ return -1;
|
||
|
+
|
||
|
+ v->maptrack_head = next;
|
||
|
+
|
||
|
+ return head;
|
||
|
}
|
||
|
|
||
|
static inline void
|
||
|
put_maptrack_handle(
|
||
|
struct grant_table *t, int handle)
|
||
|
{
|
||
|
- spin_lock(&t->maptrack_lock);
|
||
|
- maptrack_entry(t, handle).ref = t->maptrack_head;
|
||
|
- t->maptrack_head = handle;
|
||
|
- spin_unlock(&t->maptrack_lock);
|
||
|
+ struct domain *currd = current->domain;
|
||
|
+ struct vcpu *v;
|
||
|
+ unsigned int prev_tail, cur_tail;
|
||
|
+
|
||
|
+ /* 1. Set entry to be a tail. */
|
||
|
+ maptrack_entry(t, handle).ref = MAPTRACK_TAIL;
|
||
|
+
|
||
|
+ /* 2. Add entry to the tail of the list on the original VCPU. */
|
||
|
+ v = currd->vcpu[maptrack_entry(t, handle).vcpu];
|
||
|
+
|
||
|
+ cur_tail = read_atomic(&v->maptrack_tail);
|
||
|
+ do {
|
||
|
+ prev_tail = cur_tail;
|
||
|
+ cur_tail = cmpxchg(&v->maptrack_tail, prev_tail, handle);
|
||
|
+ } while ( cur_tail != prev_tail );
|
||
|
+
|
||
|
+ /* 3. Update the old tail entry to point to the new entry. */
|
||
|
+ write_atomic(&maptrack_entry(t, prev_tail).ref, handle);
|
||
|
}
|
||
|
|
||
|
static inline int
|
||
|
get_maptrack_handle(
|
||
|
struct grant_table *lgt)
|
||
|
{
|
||
|
+ struct vcpu *curr = current;
|
||
|
int i;
|
||
|
grant_handle_t handle;
|
||
|
struct grant_mapping *new_mt;
|
||
|
- unsigned int new_mt_limit, nr_frames;
|
||
|
+
|
||
|
+ handle = __get_maptrack_handle(lgt, curr);
|
||
|
+ if ( likely(handle != -1) )
|
||
|
+ return handle;
|
||
|
|
||
|
spin_lock(&lgt->maptrack_lock);
|
||
|
|
||
|
- while ( unlikely((handle = __get_maptrack_handle(lgt)) == -1) )
|
||
|
+ if ( nr_maptrack_frames(lgt) >= max_maptrack_frames )
|
||
|
{
|
||
|
- nr_frames = nr_maptrack_frames(lgt);
|
||
|
- if ( nr_frames >= max_maptrack_frames )
|
||
|
- break;
|
||
|
+ spin_unlock(&lgt->maptrack_lock);
|
||
|
+ return -1;
|
||
|
+ }
|
||
|
|
||
|
- new_mt = alloc_xenheap_page();
|
||
|
- if ( !new_mt )
|
||
|
- break;
|
||
|
+ new_mt = alloc_xenheap_page();
|
||
|
+ if ( !new_mt )
|
||
|
+ {
|
||
|
+ spin_unlock(&lgt->maptrack_lock);
|
||
|
+ return -1;
|
||
|
+ }
|
||
|
+ clear_page(new_mt);
|
||
|
|
||
|
- clear_page(new_mt);
|
||
|
+ /*
|
||
|
+ * Use the first new entry and add the remaining entries to the
|
||
|
+ * head of the free list.
|
||
|
+ */
|
||
|
+ handle = lgt->maptrack_limit;
|
||
|
|
||
|
- new_mt_limit = lgt->maptrack_limit + MAPTRACK_PER_PAGE;
|
||
|
+ for ( i = 0; i < MAPTRACK_PER_PAGE; i++ )
|
||
|
+ {
|
||
|
+ new_mt[i].ref = handle + i + 1;
|
||
|
+ new_mt[i].vcpu = curr->vcpu_id;
|
||
|
+ }
|
||
|
+ new_mt[i - 1].ref = curr->maptrack_head;
|
||
|
|
||
|
- for ( i = 1; i < MAPTRACK_PER_PAGE; i++ )
|
||
|
- new_mt[i - 1].ref = lgt->maptrack_limit + i;
|
||
|
- new_mt[i - 1].ref = lgt->maptrack_head;
|
||
|
- lgt->maptrack_head = lgt->maptrack_limit;
|
||
|
+ /* Set tail directly if this is the first page for this VCPU. */
|
||
|
+ if ( curr->maptrack_tail == MAPTRACK_TAIL )
|
||
|
+ curr->maptrack_tail = handle + MAPTRACK_PER_PAGE - 1;
|
||
|
|
||
|
- lgt->maptrack[nr_frames] = new_mt;
|
||
|
- smp_wmb();
|
||
|
- lgt->maptrack_limit = new_mt_limit;
|
||
|
+ curr->maptrack_head = handle + 1;
|
||
|
|
||
|
- gdprintk(XENLOG_INFO, "Increased maptrack size to %u frames\n",
|
||
|
- nr_frames + 1);
|
||
|
- }
|
||
|
+ lgt->maptrack[nr_maptrack_frames(lgt)] = new_mt;
|
||
|
+ lgt->maptrack_limit += MAPTRACK_PER_PAGE;
|
||
|
|
||
|
spin_unlock(&lgt->maptrack_lock);
|
||
|
|
||
|
@@ -2919,16 +2961,9 @@ grant_table_create(
|
||
|
}
|
||
|
|
||
|
/* Tracking of mapped foreign frames table */
|
||
|
- if ( (t->maptrack = xzalloc_array(struct grant_mapping *,
|
||
|
- max_maptrack_frames)) == NULL )
|
||
|
+ t->maptrack = vzalloc(max_maptrack_frames * sizeof(*t->maptrack));
|
||
|
+ if ( t->maptrack == NULL )
|
||
|
goto no_mem_2;
|
||
|
- if ( (t->maptrack[0] = alloc_xenheap_page()) == NULL )
|
||
|
- goto no_mem_3;
|
||
|
- clear_page(t->maptrack[0]);
|
||
|
- t->maptrack_limit = MAPTRACK_PER_PAGE;
|
||
|
- for ( i = 1; i < MAPTRACK_PER_PAGE; i++ )
|
||
|
- t->maptrack[0][i - 1].ref = i;
|
||
|
- t->maptrack[0][i - 1].ref = MAPTRACK_TAIL;
|
||
|
|
||
|
/* Shared grant table. */
|
||
|
if ( (t->shared_raw = xzalloc_array(void *, max_grant_frames)) == NULL )
|
||
|
@@ -2960,8 +2995,7 @@ grant_table_create(
|
||
|
free_xenheap_page(t->shared_raw[i]);
|
||
|
xfree(t->shared_raw);
|
||
|
no_mem_3:
|
||
|
- free_xenheap_page(t->maptrack[0]);
|
||
|
- xfree(t->maptrack);
|
||
|
+ vfree(t->maptrack);
|
||
|
no_mem_2:
|
||
|
for ( i = 0;
|
||
|
i < num_act_frames_from_sha_frames(INITIAL_NR_GRANT_FRAMES); i++ )
|
||
|
@@ -3096,7 +3130,7 @@ grant_table_destroy(
|
||
|
|
||
|
for ( i = 0; i < nr_maptrack_frames(t); i++ )
|
||
|
free_xenheap_page(t->maptrack[i]);
|
||
|
- xfree(t->maptrack);
|
||
|
+ vfree(t->maptrack);
|
||
|
|
||
|
for ( i = 0; i < nr_active_grant_frames(t); i++ )
|
||
|
free_xenheap_page(t->active[i]);
|
||
|
@@ -3110,6 +3144,12 @@ grant_table_destroy(
|
||
|
d->grant_table = NULL;
|
||
|
}
|
||
|
|
||
|
+void grant_table_init_vcpu(struct vcpu *v)
|
||
|
+{
|
||
|
+ v->maptrack_head = MAPTRACK_TAIL;
|
||
|
+ v->maptrack_tail = MAPTRACK_TAIL;
|
||
|
+}
|
||
|
+
|
||
|
static void gnttab_usage_print(struct domain *rd)
|
||
|
{
|
||
|
int first = 1;
|
||
|
--- sle12sp1.orig/xen/include/xen/grant_table.h 2015-07-08 13:49:47.000000000 +0200
|
||
|
+++ sle12sp1/xen/include/xen/grant_table.h 2015-07-08 13:52:23.000000000 +0200
|
||
|
@@ -60,6 +60,8 @@ struct grant_mapping {
|
||
|
u32 ref; /* grant ref */
|
||
|
u16 flags; /* 0-4: GNTMAP_* ; 5-15: unused */
|
||
|
domid_t domid; /* granting domain */
|
||
|
+ u32 vcpu; /* vcpu which created the grant mapping */
|
||
|
+ u32 pad; /* round size to a power of 2 */
|
||
|
};
|
||
|
|
||
|
/* Per-domain grant information. */
|
||
|
@@ -83,9 +85,8 @@ struct grant_table {
|
||
|
grant_status_t **status;
|
||
|
/* Active grant table. */
|
||
|
struct active_grant_entry **active;
|
||
|
- /* Mapping tracking table. */
|
||
|
+ /* Mapping tracking table per vcpu. */
|
||
|
struct grant_mapping **maptrack;
|
||
|
- unsigned int maptrack_head;
|
||
|
unsigned int maptrack_limit;
|
||
|
/* Lock protecting the maptrack page list, head, and limit */
|
||
|
spinlock_t maptrack_lock;
|
||
|
@@ -99,6 +100,7 @@ int grant_table_create(
|
||
|
struct domain *d);
|
||
|
void grant_table_destroy(
|
||
|
struct domain *d);
|
||
|
+void grant_table_init_vcpu(struct vcpu *v);
|
||
|
|
||
|
/* Domain death release of granted mappings of other domains' memory. */
|
||
|
void
|
||
|
--- sle12sp1.orig/xen/include/xen/sched.h 2015-01-14 18:44:18.000000000 +0100
|
||
|
+++ sle12sp1/xen/include/xen/sched.h 2015-07-08 13:52:23.000000000 +0200
|
||
|
@@ -219,6 +219,10 @@ struct vcpu
|
||
|
/* VCPU paused by system controller. */
|
||
|
int controller_pause_count;
|
||
|
|
||
|
+ /* Maptrack */
|
||
|
+ unsigned int maptrack_head;
|
||
|
+ unsigned int maptrack_tail;
|
||
|
+
|
||
|
/* IRQ-safe virq_lock protects against delivering VIRQ to stale evtchn. */
|
||
|
evtchn_port_t virq_to_evtchn[NR_VIRQS];
|
||
|
spinlock_t virq_lock;
|